glusterd: stop stale bricks during handshaking in brick mux mode

This patch addresses two problems: 1. During friend handshaking, if a volume is imported due to change in the version, the old bricks were not stopped which would lead to a situation where bricks will run with old volfiles. 2. As part of attaching shd service in glusterd_attach_svc, there might be a case that the volume for which we're attempting to attach a shd service might become stale and in the process of deletion and hence in every retrials (if the rpc connection isn't ready) check for the existance of the volume and then only attempt the further attach request. Fixes: bz#1733425 Change-Id: I6bac6b871f7e31cb5bf277db979289dec196a03e Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
author: Atin Mukherjee <amukherj@redhat.com> 2019-07-15 11:26:55 +0530
committer: Atin Mukherjee <amukherj@redhat.com> 2019-08-25 08:18:54 +0000
commit: f0be15cd534b91f88c0239ae850f22c5f2b90c0f (patch)
tree: e70bbad0eeb3f5343c126bdb3045b5914bb046de
parent: 093c66a0af496301e5d28d55f4fe57362cb399d5 (diff)
4 files changed, 55 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
index 5077ec1091f..eaed873d430 100644
--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -263,9 +263,6 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
     volinfo = data;
     GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
 
-    if (volinfo)
-        glusterd_volinfo_ref(volinfo);
-
     if (volinfo->is_snap_volume) {
         /* healing of a snap volume is not supported yet*/
         ret = 0;
@@ -280,9 +277,8 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
     conf->restart_shd = _gf_true;
     shd_restart = _gf_true;
 
-    ret = glusterd_shdsvc_create_volfile(volinfo);
-    if (ret)
-        goto out;
+    if (volinfo)
+        glusterd_volinfo_ref(volinfo);
 
     if (!glusterd_is_shd_compatible_volume(volinfo)) {
         ret = 0;
@@ -294,6 +290,9 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
         }
         goto out;
     }
+    ret = glusterd_shdsvc_create_volfile(volinfo);
+    if (ret)
+        goto out;
 
     ret = glusterd_shd_svc_mux_init(volinfo, svc);
     if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
index cf40a758082..3b965ea1ed8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
@@ -924,6 +924,22 @@ glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
 
     rpc = rpc_clnt_ref(svc->conn.rpc);
     for (tries = 15; tries > 0; --tries) {
+        /* There might be a case that the volume for which we're attempting to
+         * attach a shd svc might become stale and in the process of deletion.
+         * Given that the volinfo object is being already passed here before
+         * that sequence of operation has happened we might be operating on a
+         * stale volume. At every sync task switch we should check for existance
+         * of the volume now
+         */
+        if (!glusterd_volume_exists(volinfo->volname)) {
+            gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+                   "Volume %s "
+                   " is marked as stale, not attempting further shd svc attach "
+                   "attempts",
+                   volinfo->volname);
+            ret = 0;
+            goto out;
+        }
         if (rpc) {
             pthread_mutex_lock(&conf->attach_lock);
             {
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index d91b672e47e..db17fe6d609 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1654,6 +1654,33 @@ glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo)
     return ret;
 }
 
+gf_boolean_t
+glusterd_volume_exists(const char *volname)
+{
+    glusterd_volinfo_t *tmp_volinfo = NULL;
+    gf_boolean_t volume_found = _gf_false;
+    xlator_t *this = NULL;
+    glusterd_conf_t *priv = NULL;
+
+    GF_ASSERT(volname);
+    this = THIS;
+    GF_ASSERT(this);
+
+    priv = this->private;
+    GF_ASSERT(priv);
+
+    cds_list_for_each_entry(tmp_volinfo, &priv->volumes, vol_list)
+    {
+        if (!strcmp(tmp_volinfo->volname, volname)) {
+            gf_msg_debug(this->name, 0, "Volume %s found", volname);
+            volume_found = _gf_true;
+            break;
+        }
+    }
+
+    return volume_found;
+}
+
 int32_t
 glusterd_service_stop(const char *service, char *pidfile, int sig,
                       gf_boolean_t force_kill)
@@ -4714,10 +4741,11 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo,
             old_brickinfo->uuid, old_brickinfo->hostname, old_brickinfo->path,
             new_volinfo, &new_brickinfo);
         /* If the brick is stale, i.e it's not a part of the new volume
-         * or if it's part of the new volume and is pending a snap,
-         * then stop the brick process
+         * or if it's part of the new volume and is pending a snap or if it's
+         * brick multiplexing enabled, then stop the brick process
          */
-        if (ret || (new_brickinfo->snap_status == -1)) {
+        if (ret || (new_brickinfo->snap_status == -1) ||
+            is_brick_mx_enabled()) {
             /*TODO: may need to switch to 'atomic' flavour of
              * brick_stop, once we make peer rpc program also
              * synctask enabled*/
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index ed117e02267..c6a7545f4eb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -166,6 +166,9 @@ glusterd_brickinfo_new_from_brick(char *brick, glusterd_brickinfo_t **brickinfo,
 int32_t
 glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo);
 
+gf_boolean_t
+glusterd_volume_exists(const char *volname);
+
 int
 glusterd_volinfo_find_by_volume_id(uuid_t volume_id,
                                    glusterd_volinfo_t **volinfo);
author	Atin Mukherjee <amukherj@redhat.com>	2019-07-15 11:26:55 +0530
committer	Atin Mukherjee <amukherj@redhat.com>	2019-08-25 08:18:54 +0000
commit	f0be15cd534b91f88c0239ae850f22c5f2b90c0f (patch)
tree	e70bbad0eeb3f5343c126bdb3045b5914bb046de
parent	093c66a0af496301e5d28d55f4fe57362cb399d5 (diff)