From dc1d549d0e776ecd4c8381577eeed990250a7f6a Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Wed, 13 May 2020 21:53:23 +0530
Subject: mgmt/glusterd: Stop old shd before increasing replica count

Problem:
In add-brick that increases replica count
SHD was restarted after pending xattrs are set on the new bricks and
adding bricks. But before restarting SHD there is a possibility that
old SHD would do a scan on root-directory see no heal is needed and
delete index for root-dir leading to no heals until lookup is executed
on the mount

Fix:
Stop shd, perform pending-xattr setting/adding new bricks and
then restart shd

Fixes: #1240
Change-Id: I94fd7c6c909211b597185dfe097a559db6c0d00f
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
---
 .../gfid-mismatch-resolution-with-fav-child-policy.t   |  1 -
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c         | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
index 12af0c85461..35e295dc170 100644
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
@@ -227,4 +227,3 @@ HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\  -f1)
 TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
 
 cleanup;
-#G_TESTDEF_TEST_STATUS_NETBSD7=1501390
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 3af2867b82a..00a26833d58 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -991,6 +991,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
     xlator_t *this = NULL;
     glusterd_conf_t *conf = NULL;
     gf_boolean_t is_valid_add_brick = _gf_false;
+    gf_boolean_t restart_shd = _gf_false;
     struct statvfs brickstat = {
         0,
     };
@@ -1147,6 +1148,15 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
     if (glusterd_is_volume_replicate(volinfo)) {
         if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) {
             is_valid_add_brick = _gf_true;
+            if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+                ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+                if (ret) {
+                    gf_msg("glusterd", GF_LOG_ERROR, 0,
+                           GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+                           "Failed to stop shd for %s.", volinfo->volname);
+                }
+                restart_shd = _gf_true;
+            }
             ret = generate_dummy_client_volfiles(volinfo);
             if (ret) {
                 gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -1221,6 +1231,14 @@ generate_volfiles:
 out:
     GF_FREE(free_ptr1);
     GF_FREE(free_ptr2);
+    if (restart_shd) {
+        if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+                                     PROC_START_NO_WAIT)) {
+            gf_msg("glusterd", GF_LOG_CRITICAL, 0,
+                   GD_MSG_GLUSTER_SERVICE_START_FAIL,
+                   "Failed to start shd for %s.", volinfo->volname);
+        }
+    }
 
     gf_msg_debug("glusterd", 0, "Returning %d", ret);
     return ret;
-- 
cgit