summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPoornima G <pgurusid@redhat.com>2016-06-06 06:29:40 -0400
committerJeff Darcy <jdarcy@redhat.com>2016-06-16 04:57:42 -0700
commitb8ac20e888fbacad9d90cd8f1c6ff8579a5cefe9 (patch)
treef7befa6b0e065afb87d9876f731e963d065acd40
parentc04df79dc453ef5cb7b3a0ca8ba14598da6189ac (diff)
gfapi: Fix IO error caused when there is consecutive graph switches
Issue: Consider a simple situation, where glfs_init() is done, i.e. initial graph is up. Now perform 2 volume sets that results in 2 client side graph changes. After this perform some IO, the IO fails with ENOTCON. The only way to recover this client is i guess another graph switch or restart. What actually is happening from code perspective: Initial graph lets say A, followed by 2 consecutive graph switches to B and C without any IO those two switches. - graph_setup (A) as a result of GF_EVENT_CHILD_UP, and fs->next_subvol = A - glfs_init() results in fs->active_subvol = A, fs->next_subvol = NULL - graph_setup (B) as a result of GF_EVENT_CHILD_UP, and fs->next_subvol = B - graph_setup (C) as a result of GF_EVENT_CHILD_UP, and fs->next_subvol = C. It also sees that the previous graph B was never set as fs->active_subvol, i.e. no IO or anything happened on B, so can safely send GF_EVENT_PARENT_DOWN (by calling glfs_subvol_done(B)). This parent down on B, results in child_down(B), which is fine. But child_down also triggers graph_setup(B). - graph_setup(B) as a result of GF_EVENT_CHILD_DOWN, and fs->next_subvol = B, and GF_EVENT_PARENT_DOWN on C as explained above. This again leads to GF_EVENT_CHILD_DOWN on C. - graph_setup(C) as a result of GF_EVENT_CHILD_DOWN, and fs->next_subvol = C, and GF_EVENT_PARENT_DOWN on B as explained above. Thus both the graphs B and C are disconnected, and hence the ENOTCON Solution: Remove the call to graph_setup() when the event is GF_EVENT_CHILD_DOWN. It don't see any reason why graph_setup should be called when there is child_down. Not sure what the original reason was, to have graph_setup in child_down. git hostory shows the first patch itself had this call. Change-Id: I9de86555f66cc94a05649ac863b40ed3426ffd4b BUG: 1343038 Signed-off-by: Poornima G <pgurusid@redhat.com> Reviewed-on: http://review.gluster.org/14656 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--api/src/glfs-master.c1
-rw-r--r--tests/bugs/libgfapi/bug-1093594.c (renamed from tests/bugs/gfapi/bug-1093594.c)0
-rwxr-xr-xtests/bugs/libgfapi/bug-1093594.sh (renamed from tests/bugs/gfapi/bug-1093594.sh)0
-rw-r--r--tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c165
-rwxr-xr-xtests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh20
5 files changed, 185 insertions, 1 deletions
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
index ff8f68f452b..9f11a6a0c9c 100644
--- a/api/src/glfs-master.c
+++ b/api/src/glfs-master.c
@@ -105,7 +105,6 @@ notify (xlator_t *this, int event, void *data, ...)
pthread_cond_broadcast (&fs->child_down_cond);
}
pthread_mutex_unlock (&fs->mutex);
- graph_setup (fs, graph);
glfs_init_done (fs, 1);
break;
case GF_EVENT_CHILD_CONNECTING:
diff --git a/tests/bugs/gfapi/bug-1093594.c b/tests/bugs/libgfapi/bug-1093594.c
index 8f5aa9be66c..8f5aa9be66c 100644
--- a/tests/bugs/gfapi/bug-1093594.c
+++ b/tests/bugs/libgfapi/bug-1093594.c
diff --git a/tests/bugs/gfapi/bug-1093594.sh b/tests/bugs/libgfapi/bug-1093594.sh
index 444319b8e63..444319b8e63 100755
--- a/tests/bugs/gfapi/bug-1093594.sh
+++ b/tests/bugs/libgfapi/bug-1093594.sh
diff --git a/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c
new file mode 100644
index 00000000000..4cf849484a6
--- /dev/null
+++ b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c
@@ -0,0 +1,165 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define WRITE_SIZE (128)
+
+glfs_t *
+setup_new_client(char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new (volname);
+ if (!fs) {
+ fprintf (stderr, "\nglfs_new: returned NULL (%s)\n",
+ strerror (errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server (fs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ fprintf (stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror (errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging (fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf (stderr, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror (errno));
+ goto error;
+ }
+
+ ret = glfs_init (fs);
+ if (ret < 0) {
+ fprintf (stderr, "\nglfs_init failed with ret: %d (%s)\n",
+ ret, strerror (errno));
+ goto error;
+ }
+ return fs;
+error:
+ return NULL;
+}
+
+int
+write_something (glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char *buf = NULL;
+ int ret = 0;
+ int j = 0;
+
+ fd = glfs_creat (fs, "filename", O_RDWR, 0644);
+ if (!fd) {
+ fprintf (stderr, "%s: (%p) %s\n", "filename", fd,
+ strerror (errno));
+ return -1;
+ }
+
+ buf = (char *) malloc (WRITE_SIZE);
+ memset (buf, '-', WRITE_SIZE);
+
+ for (j = 0; j < 4; j++) {
+ ret = glfs_write (fd, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf (stderr, "Write(%s): %d (%s)\n", "filename", ret,
+ strerror (errno));
+ return ret;
+ }
+ glfs_lseek (fd, 0, SEEK_SET);
+ }
+ return 0;
+}
+
+static int
+volfile_change (const char *volname) {
+ int ret = 0;
+ char *cmd = NULL, *cmd1 = NULL;
+
+ ret = asprintf (&cmd, "gluster volume set %s quick-read on",
+ volname);
+ if (ret < 0) {
+ fprintf (stderr, "cannot construct cli command string (%s)",
+ strerror (errno));
+ return ret;
+ }
+
+ ret = asprintf (&cmd1, "gluster volume set %s quick-read off",
+ volname);
+ if (ret < 0) {
+ fprintf (stderr, "cannot construct cli command string (%s)",
+ strerror (errno));
+ return ret;
+ }
+
+ ret = system (cmd);
+ if (ret < 0) {
+ fprintf (stderr, "quick-read off on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system (cmd1);
+ if (ret < 0) {
+ fprintf (stderr, "quick-read on on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system (cmd);
+ if (ret < 0) {
+ fprintf (stderr, "quick-read off on (%s) failed", volname);
+ return ret;
+ }
+
+ free (cmd);
+ free (cmd1);
+ return ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char buf[100];
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 3) {
+ fprintf (stderr,
+ "Expect following args %s <Vol> <log file location>\n"
+ , argv[0]);
+ return -1;
+ }
+
+ fs = setup_new_client (argv[1], argv[2]);
+ if (!fs)
+ goto error;
+
+ ret = volfile_change (argv[1]);
+ if (ret < 0)
+ goto error;
+
+ /* This is required as volfile change takes a while to reach this
+ * gfapi client and precess the graph change. Without this the issue
+ * cannot be reproduced as in cannot be tested.
+ */
+ sleep (10);
+
+ ret = write_something (fs);
+ if (ret < 0)
+ goto error;
+
+ ret = glfs_fini (fs);
+ if (ret < 0) {
+ fprintf (stderr, "glfs_fini failed with ret: %d (%s)\n",
+ ret, strerror (errno));
+ goto error;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh
new file mode 100755
index 00000000000..43cad2b15ee
--- /dev/null
+++ b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/glfs_vol_set_IO_ERR.c -lgfapi
+TEST $(dirname $0)/glfs_vol_set_IO_ERR $V0 $logdir/glfs_vol_set_IO_ERR.log
+
+cleanup_tester $(dirname $0)/glfs_vol_set_IO_ERR
+cleanup;