From f51ffaeda4c87b682b7865c26befd75fe1c8cb25 Mon Sep 17 00:00:00 2001
From: Atin Mukherjee <amukherj@redhat.com>
Date: Tue, 21 Jul 2015 09:57:43 +0530
Subject: glusterd: Don't allow remove brick start/commit if glusterd is down
 of the host of the brick

Backport of http://review.gluster.org/#/c/11726/

remove brick stage blindly starts the remove brick operation even if the
glusterd instance of the node hosting the brick is down. Operationally its
incorrect and this could result into a inconsistent rebalance status across all
the nodes as the originator of this command will always have the rebalance
status to 'DEFRAG_NOT_STARTED', however when the glusterd instance on the other
nodes comes up, will trigger rebalance and make the status to completed once the
rebalance is finished.

This patch fixes two things:
1. Add a validation in remove brick to check whether all the peers hosting the
bricks to be removed are up.

2. Don't copy volinfo->rebal.dict from stale volinfo during restore as this
might end up in a incosistent node_state.info file resulting into volume status
command failure.

Change-Id: Ia4a76865c05037d49eec5e3bbfaf68c1567f1f81
BUG: 1256265
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: http://review.gluster.org/11726
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: N Balachandran <nbalacha@redhat.com>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-on: http://review.gluster.org/11996
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Kaushal M <kaushal@redhat.com>
---
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 119 ++++++++++++++++++-------
 1 file changed, 89 insertions(+), 30 deletions(-)

(limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')

diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index e1c9fa66ce2..5e9a360280f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1661,6 +1661,83 @@ out:
         return ret;
 }
 
+static int
+glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
+                                       dict_t *dict,
+                                       glusterd_volinfo_t *volinfo,
+                                       char **errstr)
+{
+        char                   *brick       = NULL;
+        char                    msg[2048]   = {0,};
+        char                    key[256]    = {0,};
+        glusterd_brickinfo_t   *brickinfo   = NULL;
+        glusterd_peerinfo_t    *peerinfo    = NULL;
+        int                     i           = 0;
+        int                     ret         = -1;
+
+        /* Check whether all the nodes of the bricks to be removed are
+        * up, if not fail the operation */
+        for (i = 1; i <= brick_count; i++) {
+                snprintf (key, sizeof (key), "brick%d", i);
+                ret = dict_get_str (dict, key, &brick);
+                if (ret) {
+                        snprintf (msg, sizeof (msg),
+                                  "Unable to get %s", key);
+                        *errstr = gf_strdup (msg);
+                        goto out;
+                }
+
+                ret =
+                glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+                                                       &brickinfo);
+                if (ret) {
+                        snprintf (msg, sizeof (msg), "Incorrect brick "
+                                  "%s for volume %s", brick, volinfo->volname);
+                        *errstr = gf_strdup (msg);
+                        goto out;
+                }
+                /* Do not allow commit if the bricks are not decommissioned
+                 * if its a remove brick commit
+                 */
+                if (cmd == GF_OP_CMD_COMMIT && !brickinfo->decommissioned) {
+                        snprintf (msg, sizeof (msg), "Brick %s "
+                                  "is not decommissioned. "
+                                  "Use start or force option",
+                                  brick);
+                        *errstr = gf_strdup (msg);
+                        ret = -1;
+                        goto out;
+                }
+
+                if (glusterd_is_local_brick (THIS, volinfo, brickinfo))
+                        continue;
+
+                rcu_read_lock ();
+                peerinfo = glusterd_peerinfo_find_by_uuid
+                                                (brickinfo->uuid);
+                if (!peerinfo) {
+                        snprintf (msg, sizeof(msg), "Host node of the "
+                                  "brick %s is not in cluster", brick);
+                        *errstr = gf_strdup (msg);
+                        ret = -1;
+                        rcu_read_unlock ();
+                        goto out;
+                }
+                if (!peerinfo->connected) {
+                        snprintf (msg, sizeof(msg), "Host node of the "
+                                  "brick %s is down", brick);
+                        *errstr = gf_strdup (msg);
+                        ret = -1;
+                        rcu_read_unlock ();
+                        goto out;
+                }
+                rcu_read_unlock ();
+        }
+
+out:
+        return ret;
+}
+
 int
 glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
 {
@@ -1679,6 +1756,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
         char                   *brick       = NULL;
         glusterd_brickinfo_t   *brickinfo   = NULL;
         gsync_status_param_t    param       = {0,};
+        glusterd_peerinfo_t    *peerinfo    = NULL;
 
         this = THIS;
         GF_ASSERT (this);
@@ -1817,6 +1895,12 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
                         goto out;
                 }
 
+                ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+                                                             dict, volinfo,
+                                                             &errstr);
+                if (ret)
+                        goto out;
+
                 if (is_origin_glusterd (dict)) {
                         ret = glusterd_generate_and_set_task_id
                                 (dict, GF_REMOVE_BRICK_TID_KEY);
@@ -1861,36 +1945,11 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
                         goto out;
                 }
 
-                /* Do not allow commit if the bricks are not decommissioned */
-                for ( i = 1; i <= brick_count; i++ ) {
-                        snprintf (key, sizeof (key), "brick%d", i);
-                        ret = dict_get_str (dict, key, &brick);
-                        if (ret) {
-                                snprintf (msg, sizeof (msg),
-                                          "Unable to get %s", key);
-                                errstr = gf_strdup (msg);
-                                goto out;
-                        }
-
-                        ret =
-                        glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
-                                                               &brickinfo);
-                        if (ret) {
-                                snprintf (msg, sizeof (msg), "Incorrect brick "
-                                          "%s for volume %s", brick, volname);
-                                errstr = gf_strdup (msg);
-                                goto out;
-                        }
-                        if ( !brickinfo->decommissioned ) {
-                                snprintf (msg, sizeof (msg), "Brick %s "
-                                          "is not decommissioned. "
-                                          "Use start or force option",
-                                          brick);
-                                errstr = gf_strdup (msg);
-                                ret = -1;
-                                goto out;
-                        }
-                }
+                ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+                                                             dict, volinfo,
+                                                             &errstr);
+                if (ret)
+                        goto out;
 
                 /* If geo-rep is configured, for this volume, it should be
                  * stopped.
-- 
cgit