glusterd: import volumes in separate synctask

With brick multiplexing, to attach a brick to an existing brick process the prerequisite is to have the compatible brick to finish it's initialization and portmap sign in and hence the thread might have to go to a sleep and context switch the synctask to allow the brick process to communicate with glusterd. In normal code path, this works fine as glusterd_restart_bricks () is launched through a separate synctask. In case there's a mismatch of the volume when glusterd restarts, glusterd_import_friend_volume is invoked and then it tries to call glusterd_start_bricks () from the main thread which eventually may land into the similar situation. Now since this is not done through a separate synctask, the 1st brick will never be able to get its turn to finish all of its handshaking and as a consequence to it, all the bricks will fail to get attached to it. Solution : Execute import volume and glusterd restart bricks in separate synctask. Importing snaps had to be also done through synctask as there's a dependency of the parent volume need to be available for the importing snap functionality to work. >mainline patch : https://review.gluster.org/#/c/19357/ https://review.gluster.org/#/c/19536/ https://review.gluster.org/#/c/19539/ Change-Id: I290b244d456afcc9b913ab30be4af040d340428c BUG: 1543706 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> (cherry picked from commit cb0339f9229fc5c05d7ef4cfcc4ca9c4569f3755)
author: Atin Mukherjee <amukherj@redhat.com> 2018-02-08 09:09:00 +0530
committer: Shyamsundar Ranganathan <srangana@redhat.com> 2018-02-21 15:35:15 +0000
commit: 9b2995426ea206df9a4d8f14bbdb8e8baf73d91b (patch)
tree: 69f23e61cb27e7238256fa7ea73c949d5d86938f
parent: 325d714e40b273b99a63f58a4c6c83b7f1143ee5 (diff)
7 files changed, 356 insertions, 70 deletions
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
index c5a00881705..22f98d2b5a7 100644
--- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
@@ -92,20 +92,33 @@ EXPECT "0" mounted_snaps ${V1}
 # handled during handshake.
 
 activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
 kill_glusterd 2
+
 deactivate_snapshots
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
 TEST start_glusterd 2
 
 # Updates form friend should reflect as snap was deactivated while glusterd
 # process was inactive and mount point should also not exist.
 
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0}
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1}
 
 kill_glusterd 2
 activate_snapshots
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
 TEST start_glusterd 2
 
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
 # Updates form friend should reflect as snap was activated while glusterd
 # process was inactive and mount point should exist.
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0}
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 7227c6a7bef..11b16e216ec 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2412,6 +2412,7 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
 
 int
 glusterd_start_bricks (glusterd_volinfo_t *volinfo)
+
 {
         int                      ret            = -1;
         glusterd_brickinfo_t    *brickinfo      = NULL;
@@ -2440,14 +2441,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
                                 goto out;
                         }
                 }
-
-        }
-        ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
-        if (ret) {
-                gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
-                        "Failed to write volinfo for volume %s",
-                        volinfo->volname);
-                goto out;
         }
         ret = 0;
 out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 7139a27ffb4..daf8f21cc6c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -274,8 +274,10 @@ glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
                 int32_t blk_count, double *throughput, double *time);
 gf_boolean_t
 glusterd_is_volume_started (glusterd_volinfo_t  *volinfo);
+
 int
 glusterd_start_bricks (glusterd_volinfo_t *volinfo);
+
 gf_boolean_t
 glusterd_are_all_volumes_stopped ();
 int
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 19a3cf7b10d..85e92c39141 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -1760,8 +1760,11 @@ out:
  * state, i.e either both would be hosting bricks or both would not be hosting
  * bricks, then a decision can't be taken and a peer-reject will happen.
  *
- * glusterd_compare_and_update_snap() implements the following algorithm to
- * perform the above task:
+ * glusterd_compare_snap()  & glusterd_update_snaps () implement the following
+ * algorithm to perform the above task. Please note the former function tries to
+ * iterate over the snaps one at a time and updating the relevant fields in the
+ * dictionary and then glusterd_update_snaps () go over all the snaps and update
+ * them at one go as part of a synctask.
  * Step  1: Start.
  * Step  2: Check if the peer is missing a delete or restore on the said snap.
  *          If yes, goto step 6.
@@ -1786,21 +1789,18 @@ out:
  *
  */
 int32_t
-glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
-                                  char *peername, uuid_t peerid)
+glusterd_compare_snap (dict_t *peer_data, int32_t snap_count,
+                       char *peername, uuid_t peerid)
 {
         char              buf[NAME_MAX]    = "";
         char              prefix[NAME_MAX] = "";
         char             *peer_snap_name   = NULL;
         char             *peer_snap_id     = NULL;
-        dict_t           *dict             = NULL;
         glusterd_snap_t  *snap             = NULL;
         gf_boolean_t      conflict         = _gf_false;
         gf_boolean_t      is_local         = _gf_false;
         gf_boolean_t      is_hosted        = _gf_false;
         gf_boolean_t      missed_delete    = _gf_false;
-        gf_boolean_t      remove_lvm       = _gf_true;
-
         int32_t           ret              = -1;
         int32_t           volcount         = 0;
         xlator_t         *this             = NULL;
@@ -1812,6 +1812,14 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
 
         snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
 
+        ret = dict_set_uint32 (peer_data, buf, 0);
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
+        ret = dict_set_uint32 (peer_data, buf, 0);
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+        ret = dict_set_uint32 (peer_data, buf, 0);
+        snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
+        ret = dict_set_uint32 (peer_data, buf, 0);
+
         /* Fetch the peer's snapname */
         snprintf (buf, sizeof(buf), "%s.snapname", prefix);
         ret = dict_get_str (peer_data, buf, &peer_snap_name);
@@ -1868,7 +1876,10 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
                         /* Peer has snap with the same snapname
                         * and snap_id, which local node doesn't have.
                         */
-                        goto accept_peer_data;
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
+                                  prefix);
+                        ret = dict_set_uint32 (peer_data, buf, 1);
+                        goto out;
                 }
                 /* Peer has snap with the same snapname
                  * and snap_id. Now check if peer has a
@@ -1895,12 +1906,18 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
                          * When removing data from local node, make sure
                          * we are not removing backend lvm of the snap.
                          */
-                        remove_lvm = _gf_false;
-                        goto remove_my_data;
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+                        ret = dict_set_uint32 (peer_data, buf, 0);
+                        snprintf (buf, sizeof(buf), "%s.remove_my_data",
+                                  prefix);
+                        ret = dict_set_uint32 (peer_data, buf, 1);
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
+                                  prefix);
+                        ret = dict_set_uint32 (peer_data, buf, 1);
                 } else {
                         ret = 0;
-                        goto out;
                 }
+                goto out;
         }
 
         /* There is a conflict. Check if the current node is
@@ -1952,50 +1969,176 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
          * And local node isn't. Hence remove local node's
          * data and accept peer data
          */
-
         gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting "
                 "snap(%s). Removing local data. Accepting peer data.",
                 peer_snap_name);
-        remove_lvm = _gf_true;
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+        ret = dict_set_uint32 (peer_data, buf, 1);
+        snprintf (buf, sizeof(buf), "%s.remove_my_data",
+                  prefix);
+        ret = dict_set_uint32 (peer_data, buf, 1);
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
+        ret = dict_set_uint32 (peer_data, buf, 1);
 
-remove_my_data:
+out:
+        gf_msg_trace (this->name, 0, "Returning %d", ret);
+        return ret;
+}
 
-        dict = dict_new();
-        if (!dict) {
-                gf_msg (this->name, GF_LOG_ERROR, 0,
-                        GD_MSG_DICT_CREATE_FAIL,
-                        "Unable to create dict");
-                ret = -1;
-                goto out;
+int32_t
+glusterd_update_snaps_synctask (void *opaque)
+{
+        int32_t           ret              = -1;
+        int32_t           snap_count       = 0;
+        int               i                = 1;
+        xlator_t         *this             = NULL;
+        dict_t           *peer_data        = NULL;
+        char              buf[NAME_MAX]    = "";
+        char              prefix[NAME_MAX] = "";
+        char             *peer_snap_name   = NULL;
+        char             *peer_snap_id     = NULL;
+        char             *peername         = NULL;
+        gf_boolean_t      remove_lvm       = _gf_false;
+        gf_boolean_t      remove_my_data   = _gf_false;
+        gf_boolean_t      accept_peer_data = _gf_false;
+        int32_t           val              = 0;
+        glusterd_snap_t  *snap             = NULL;
+        dict_t           *dict             = NULL;
+        glusterd_conf_t  *conf             = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
+
+        conf = this->private;
+        GF_ASSERT (conf);
+
+        peer_data = (dict_t *)opaque;
+        GF_ASSERT (peer_data);
+
+        synclock_lock (&conf->big_lock);
+
+        while (conf->restart_bricks) {
+                synclock_unlock (&conf->big_lock);
+                sleep (2);
+                synclock_lock (&conf->big_lock);
         }
+        conf->restart_bricks = _gf_true;
 
-        ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false,
-                                    _gf_false);
+        ret = dict_get_int32 (peer_data, "snap_count", &snap_count);
         if (ret) {
                 gf_msg (this->name, GF_LOG_ERROR, 0,
-                        GD_MSG_SNAP_REMOVE_FAIL,
-                        "Failed to remove snap %s", snap->snapname);
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count");
                 goto out;
         }
-
-accept_peer_data:
-
-        /* Accept Peer Data */
-        ret = glusterd_import_friend_snap (peer_data, snap_count,
-                                           peer_snap_name, peer_snap_id);
+        ret = dict_get_str (peer_data, "peername", &peername);
         if (ret) {
                 gf_msg (this->name, GF_LOG_ERROR, 0,
-                        GD_MSG_SNAP_IMPORT_FAIL,
-                        "Failed to import snap %s from peer %s",
-                        peer_snap_name, peername);
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch peername");
                 goto out;
         }
 
+        for (i = 1; i <= snap_count; i++) {
+                snprintf (prefix, sizeof(prefix), "snap%d", i);
+
+                /* Fetch the peer's snapname */
+                snprintf (buf, sizeof(buf), "%s.snapname", prefix);
+                ret = dict_get_str (peer_data, buf, &peer_snap_name);
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
+                                GD_MSG_DICT_GET_FAILED,
+                                "Unable to fetch snapname from peer: %s",
+                                peername);
+                        goto out;
+                }
+
+                /* Fetch the peer's snap_id */
+                snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
+                ret = dict_get_str (peer_data, buf, &peer_snap_id);
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
+                                GD_MSG_DICT_GET_FAILED,
+                                "Unable to fetch snap_id from peer: %s",
+                                peername);
+                        goto out;
+                }
+
+                /* remove_my_data */
+                snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
+                ret = dict_get_int32 (peer_data, buf, &val);
+                if (val)
+                        remove_my_data = _gf_true;
+                else
+                        remove_my_data = _gf_false;
+
+                if (remove_my_data) {
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
+                        ret = dict_get_int32 (peer_data, buf, &val);
+                        if (val)
+                                remove_lvm = _gf_true;
+                        else
+                                remove_lvm = _gf_false;
+
+                        dict = dict_new();
+                        if (!dict) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_DICT_CREATE_FAIL,
+                                        "Unable to create dict");
+                                ret = -1;
+                                goto out;
+                        }
+                        snap = glusterd_find_snap_by_name (peer_snap_name);
+                        if (!snap) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_MISSED_SNAP_PRESENT,
+                                        "Snapshot %s from peer %s missing on "
+                                        "localhost", peer_snap_name,
+                                        peername);
+                                ret = -1;
+                                goto out;
+                        }
+
+                        ret = glusterd_snap_remove (dict, snap, remove_lvm,
+                                                    _gf_false, _gf_false);
+                        if (ret) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_SNAP_REMOVE_FAIL,
+                                        "Failed to remove snap %s",
+                                        snap->snapname);
+                                goto out;
+                        }
+                        if (dict)
+                                dict_unref (dict);
+                }
+                snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
+                ret = dict_get_int32 (peer_data, buf, &val);
+                if (val)
+                        accept_peer_data = _gf_true;
+                else
+                        accept_peer_data = _gf_false;
+
+                if (accept_peer_data) {
+                        /* Accept Peer Data */
+                        ret = glusterd_import_friend_snap (peer_data,
+                                                           i,
+                                                           peer_snap_name,
+                                                           peer_snap_id);
+                        if (ret) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_SNAP_IMPORT_FAIL,
+                                        "Failed to import snap %s from peer %s",
+                                        peer_snap_name, peername);
+                                goto out;
+                        }
+                }
+        }
+
 out:
+        if (peer_data)
+                dict_unref (peer_data);
         if (dict)
                 dict_unref (dict);
+        conf->restart_bricks = _gf_false;
 
-        gf_msg_trace (this->name, 0, "Returning %d", ret);
         return ret;
 }
 
@@ -2010,6 +2153,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
         int32_t          snap_count   = 0;
         int              i            = 1;
         xlator_t        *this         = NULL;
+        dict_t          *peer_data_copy = NULL;
 
         this = THIS;
         GF_ASSERT (this);
@@ -2025,8 +2169,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
 
         for (i = 1; i <= snap_count; i++) {
                 /* Compare one snapshot from peer_data at a time */
-                ret = glusterd_compare_and_update_snap (peer_data, i, peername,
-                                                        peerid);
+                ret = glusterd_compare_snap (peer_data, i, peername, peerid);
                 if (ret) {
                         gf_msg (this->name, GF_LOG_ERROR, 0,
                                 GD_MSG_SNAPSHOT_OP_FAILED,
@@ -2035,6 +2178,18 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
                         goto out;
                 }
         }
+        /* Update the snaps at one go */
+        peer_data_copy = dict_copy_with_ref (peer_data, NULL);
+        ret = dict_set_str (peer_data_copy, "peername", peername);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+                        "Failed to set peername into the dict");
+                if (peer_data_copy)
+                        dict_unref (peer_data_copy);
+                goto out;
+        }
+        glusterd_launch_synctask (glusterd_update_snaps_synctask,
+                                  peer_data_copy);
 
 out:
         gf_msg_trace (this->name, 0, "Returning %d", ret);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 7d1fc33b05d..5b2a3dda6d1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -3460,6 +3460,14 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count,
         *status = GLUSTERD_VOL_COMP_SCS;
 
 out:
+        memset (key, 0, sizeof (key));
+        snprintf (key, sizeof (key), "volume%d.update", count);
+
+        if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
+                ret = dict_set_int32 (peer_data, key, 1);
+        } else {
+                ret = dict_set_int32 (peer_data, key, 0);
+        }
         if (*status == GLUSTERD_VOL_COMP_RJT) {
                 gf_event (EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
                           volinfo->volname);
@@ -3532,13 +3540,12 @@ glusterd_spawn_daemons (void *opaque)
         int             ret     = -1;
 
         synclock_lock (&conf->big_lock);
-        glusterd_restart_bricks (conf);
+        glusterd_restart_bricks ();
         glusterd_restart_gsyncds (conf);
         glusterd_restart_rebalance (conf);
         ret = glusterd_snapdsvc_restart ();
         ret = glusterd_tierdsvc_restart ();
         ret = glusterd_gfproxydsvc_restart ();
-
         return ret;
 }
 
@@ -4304,20 +4311,35 @@ out:
 int32_t
 glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
 {
-        int                  ret = 0;
-        glusterd_brickinfo_t *brickinfo = NULL;
+        int                      ret        = 0;
+        glusterd_brickinfo_t    *brickinfo  = NULL;
+        glusterd_brick_proc_t   *brick_proc = NULL;
+        int                      brick_count = 0;
+
         GF_ASSERT (volinfo);
 
         cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
                 if (glusterd_is_brick_started (brickinfo)) {
-                        ret = glusterd_brick_disconnect (brickinfo);
-                        if (ret) {
-                                gf_msg ("glusterd", GF_LOG_ERROR, 0,
-                                        GD_MSD_BRICK_DISCONNECT_FAIL,
-                                        "Failed to "
-                                        "disconnect %s:%s", brickinfo->hostname,
-                                        brickinfo->path);
-                                break;
+                        /* If brick multiplexing is enabled then we can't
+                         * blindly set brickinfo->rpc to NULL as it might impact
+                         * the other attached bricks.
+                         */
+                        ret = glusterd_brick_proc_for_port (brickinfo->port,
+                                                            &brick_proc);
+                        if (!ret) {
+                                brick_count = brick_proc->brick_count;
+                        }
+                        if (!is_brick_mx_enabled () || brick_count == 0) {
+                                ret = glusterd_brick_disconnect (brickinfo);
+                                if (ret) {
+                                        gf_msg ("glusterd", GF_LOG_ERROR, 0,
+                                                GD_MSD_BRICK_DISCONNECT_FAIL,
+                                                "Failed to "
+                                                "disconnect %s:%s",
+                                                brickinfo->hostname,
+                                                brickinfo->path);
+                                        break;
+                                }
                         }
                 }
         }
@@ -4556,7 +4578,7 @@ out:
 }
 
 int32_t
-glusterd_import_friend_volume (dict_t *peer_data, size_t count)
+glusterd_import_friend_volume (dict_t *peer_data, int count)
 {
 
         int32_t                 ret = -1;
@@ -4565,6 +4587,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
         glusterd_volinfo_t      *old_volinfo = NULL;
         glusterd_volinfo_t      *new_volinfo = NULL;
         glusterd_svc_t          *svc         = NULL;
+        int32_t                  update      = 0;
+        char                     key[512]    = {0,};
 
         GF_ASSERT (peer_data);
 
@@ -4572,6 +4596,15 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
         GF_ASSERT (this);
         priv = this->private;
         GF_ASSERT (priv);
+
+        memset (key, 0, sizeof (key));
+        snprintf (key, sizeof (key), "volume%d.update", count);
+        ret = dict_get_int32 (peer_data, key, &update);
+        if (ret || !update) {
+                /* if update is 0 that means the volume is not imported */
+                goto out;
+        }
+
         ret = glusterd_import_volinfo (peer_data, count,
                                        &new_volinfo, "volume");
         if (ret)
@@ -4585,6 +4618,14 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
 
         ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
         if (0 == ret) {
+                if (new_volinfo->version <= old_volinfo->version) {
+                        /* When this condition is true, it already means that
+                         * the other synctask thread of import volume has
+                         * already up to date volume, so just ignore this volume
+                         * now
+                         */
+                        goto out;
+                }
                 /* Ref count the old_volinfo such that deleting it doesn't crash
                  * if its been already in use by other thread
                  */
@@ -4615,7 +4656,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
                 }
         }
 
-        ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+        ret = glusterd_store_volinfo (new_volinfo,
+                                      GLUSTERD_VOLINFO_VER_AC_NONE);
         if (ret) {
                 gf_msg (this->name, GF_LOG_ERROR, 0,
                         GD_MSG_VOLINFO_STORE_FAIL, "Failed to store "
@@ -4643,6 +4685,60 @@ out:
 }
 
 int32_t
+glusterd_import_friend_volumes_synctask (void *opaque)
+{
+        int32_t                 ret = -1;
+        int32_t                 count = 0;
+        int                     i = 1;
+        xlator_t                *this = NULL;
+        glusterd_conf_t         *conf = NULL;
+        dict_t *peer_data         = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
+
+        conf = this->private;
+        GF_ASSERT (conf);
+
+        peer_data = (dict_t *)opaque;
+        GF_ASSERT (peer_data);
+
+        ret = dict_get_int32 (peer_data, "count", &count);
+        if (ret)
+                goto out;
+
+        synclock_lock (&conf->big_lock);
+
+        /* We need to ensure that importing a volume shouldn't race with an
+         * other thread where as part of restarting glusterd, bricks are
+         * restarted (refer glusterd_restart_bricks ())
+         */
+        while (conf->restart_bricks) {
+                synclock_unlock (&conf->big_lock);
+                sleep (2);
+                synclock_lock (&conf->big_lock);
+        }
+        conf->restart_bricks = _gf_true;
+
+        while (i <= count) {
+                ret = glusterd_import_friend_volume (peer_data, i);
+                if (ret) {
+                        conf->restart_bricks = _gf_false;
+                        goto out;
+                }
+                i++;
+        }
+        glusterd_svcs_manager (NULL);
+        conf->restart_bricks = _gf_false;
+out:
+        if (peer_data)
+                dict_unref (peer_data);
+
+        gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
+        return ret;
+}
+
+int32_t
 glusterd_import_friend_volumes (dict_t *peer_data)
 {
         int32_t                 ret = -1;
@@ -4781,8 +4877,10 @@ glusterd_import_global_opts (dict_t *friend_data)
                  * recompute if quorum is met. If quorum is not met bricks are
                  * not started and those already running are stopped
                  */
-                if (old_quorum != new_quorum)
-                        glusterd_restart_bricks (conf);
+                if (old_quorum != new_quorum) {
+                        glusterd_launch_synctask (glusterd_restart_bricks,
+                                                  NULL);
+                }
         }
 
         ret = 0;
@@ -4802,6 +4900,7 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
         gf_boolean_t     update    = _gf_false;
         xlator_t        *this      = NULL;
         glusterd_conf_t *priv      = NULL;
+        dict_t          *peer_data_copy = NULL;
 
         this = THIS;
         GF_ASSERT (this);
@@ -4833,18 +4932,23 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
                         goto out;
                 }
                 if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
-                        ret = glusterd_import_friend_volume (peer_data, i);
-                        if (ret) {
-                                goto out;
-                        }
                         update = _gf_true;
-                        *status = GLUSTERD_VOL_COMP_NONE;
                 }
                 i++;
         }
 
         if (update) {
-                glusterd_svcs_manager (NULL);
+                /* Launch the import friend volume as a separate synctask as it
+                 * has to trigger start bricks where we may need to wait for the
+                 * first brick to come up before attaching the subsequent bricks
+                 * in case brick multiplexing is enabled
+                 */
+                peer_data_copy = dict_copy_with_ref (peer_data, NULL);
+                glusterd_launch_synctask
+                        (glusterd_import_friend_volumes_synctask,
+                         peer_data_copy);
+                if (ret)
+                        goto out;
         }
 
 out:
@@ -5994,7 +6098,7 @@ out:
 }
 
 int
-glusterd_restart_bricks (glusterd_conf_t *conf)
+glusterd_restart_bricks (void *opaque)
 {
         int                   ret            = 0;
         glusterd_volinfo_t   *volinfo        = NULL;
@@ -6002,6 +6106,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
         glusterd_snap_t      *snap           = NULL;
         gf_boolean_t          start_svcs     = _gf_false;
         xlator_t             *this           = NULL;
+        glusterd_conf_t      *conf           = NULL;
         int                   active_count   = 0;
         int                   quorum_count   = 0;
         gf_boolean_t          node_quorum    = _gf_false;
@@ -6012,6 +6117,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
         conf = this->private;
         GF_VALIDATE_OR_GOTO (this->name, conf, return_block);
 
+        /* We need to ensure that restarting the bricks during glusterd restart
+         * shouldn't race with the import volume thread (refer
+         * glusterd_compare_friend_data ())
+         */
+        while (conf->restart_bricks) {
+                synclock_unlock (&conf->big_lock);
+                sleep (2);
+                synclock_lock (&conf->big_lock);
+        }
+        conf->restart_bricks = _gf_true;
+
         ++(conf->blockers);
         ret = glusterd_get_quorum_cluster_counts (this, &active_count,
                                                   &quorum_count);
@@ -6022,8 +6138,9 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
                 node_quorum = _gf_true;
 
         cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
-                if (volinfo->status != GLUSTERD_STATUS_STARTED)
+                if (volinfo->status != GLUSTERD_STATUS_STARTED) {
                         continue;
+                }
                 gf_msg_debug (this->name, 0, "starting the volume %s",
                         volinfo->volname);
 
@@ -6130,6 +6247,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
 out:
         --(conf->blockers);
         conf->restart_done = _gf_true;
+        conf->restart_bricks = _gf_false;
 
 return_block:
         return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 347a2282b89..4b5b443db0e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -244,6 +244,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node);
 int
 glusterd_remote_hostname_get (rpcsvc_request_t *req,
                               char *remote_host, int len);
+
+int32_t
+glusterd_import_friend_volumes_synctask (void *opaque);
+
 int32_t
 glusterd_import_friend_volumes (dict_t *peer_data);
 void
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index d56727744cb..e7a4c80a635 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -197,6 +197,7 @@ typedef struct {
         int32_t                    workers;
         uint32_t                   blockers;
         uint32_t                   mgmt_v3_lock_timeout;
+        gf_boolean_t               restart_bricks;
 } glusterd_conf_t;
 
 
@@ -1081,7 +1082,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
                                     dict_t  *volumes, int   count);
 
 int
-glusterd_restart_bricks (glusterd_conf_t *conf);
+glusterd_restart_bricks ();
 
 int32_t
 glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,
author	Atin Mukherjee <amukherj@redhat.com>	2018-02-08 09:09:00 +0530
committer	Shyamsundar Ranganathan <srangana@redhat.com>	2018-02-21 15:35:15 +0000
commit	9b2995426ea206df9a4d8f14bbdb8e8baf73d91b (patch)
tree	69f23e61cb27e7238256fa7ea73c949d5d86938f
parent	325d714e40b273b99a63f58a4c6c83b7f1143ee5 (diff)