cli,glusterd: Improve detach check validation

This patch improves the validation for the 'peer detach' command. A check for if volumes exist with some bricks on the peer being detached validation is added in peer detach code flow (even force would have this validation). This patch also gurantees that peer detach doesn't fail for a volume with all its brick on the peer which is getting detached and there are no other bricks on this peer. The following steps need to be followed for removing a downed and unrecoverable peer. * If a replacement system is available - add it to the cluster - use replace-brick to migrate bricks of the downed peer to the new peer (since data cannot be recovered anyway use the 'replace-brick commit force' command) or, If no replacement system is available, - remove bricks of the downed peer using 'remove-brick' Change-Id: Ie85ac5b66e87bec365fdedd8352b645bb25e1c33 BUG: 983590 Signed-off-by: Kaushal M <kaushal@redhat.com> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-on: http://review.gluster.org/5325 Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
author: Kaushal M <kaushal@redhat.com> 2013-07-11 19:42:16 +0530
committer: Vijay Bellur <vbellur@redhat.com> 2014-04-11 17:12:10 -0700
commit: 0e7f8af0db8201ee892979713ac86d5548f5ec73 (patch)
tree: 2c06ec38bafe7053f7ad63db080a578dccd032f0
parent: 29bccc2ed18eedc40e83d2f0d35327037a322384 (diff)
4 files changed, 55 insertions, 25 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 58c030ca0..53c402136 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -1167,6 +1167,8 @@ __glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
         char                      *hostname = NULL;
         int                            port = 0;
         int                           flags = 0;
+        glusterd_volinfo_t         *volinfo = NULL;
+        glusterd_volinfo_t             *tmp = NULL;
 
         this = THIS;
         GF_ASSERT (this);
@@ -1207,7 +1209,6 @@ __glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
                 gf_log (this->name, GF_LOG_ERROR, "Failed to get port");
                 goto out;
         }
-
         ret = dict_get_int32 (dict, "flags", &flags);
         if (ret) {
                 gf_log (this->name, GF_LOG_ERROR, "Failed to get flags");
@@ -1227,22 +1228,30 @@ __glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
         }
 
         if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
-                if (!uuid_is_null (uuid)) {
-                        /* Check if peers are connected, except peer being detached*/
-                        if (!glusterd_chk_peers_connected_befriended (uuid)) {
-                                ret = -1;
-                                op_errno = GF_DEPROBE_FRIEND_DOWN;
-                                goto out;
-                        }
-                        ret = glusterd_all_volume_cond_check (
-                                                 glusterd_friend_brick_belongs,
-                                                 -1, &uuid);
-                        if (ret) {
-                                op_errno = GF_DEPROBE_BRICK_EXIST;
-                                goto out;
-                        }
+                /* Check if peers are connected, except peer being
+                * detached*/
+                if (!glusterd_chk_peers_connected_befriended (uuid)) {
+                        ret = -1;
+                        op_errno = GF_DEPROBE_FRIEND_DOWN;
+                        goto out;
                 }
+        }
 
+        /* Check for if volumes exist with some bricks on the peer being
+        * detached. It's not a problem if a volume contains none or all
+        * of its bricks on the peer being detached
+        */
+        list_for_each_entry_safe (volinfo, tmp, &priv->volumes,
+                                  vol_list) {
+                ret = glusterd_friend_contains_vol_bricks (volinfo,
+                                                           uuid);
+                if (ret == 1) {
+                        op_errno = GF_DEPROBE_BRICK_EXIST;
+                        goto out;
+                }
+        }
+
+        if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
                 if (glusterd_is_any_volume_in_server_quorum (this) &&
                     !does_gd_meet_server_quorum (this)) {
                         gf_log (this->name, GF_LOG_ERROR, "Quorum does not "
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index c671edf68..fd56e5abf 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -527,6 +527,9 @@ out:
         return ret;
 }
 
+/* Clean up stale volumes on the peer being detached. The volumes which have
+ * bricks on other peers are stale with respect to the detached peer.
+ */
 static int
 glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
 {
@@ -538,6 +541,12 @@ glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
 
         list_for_each_entry_safe (volinfo,tmp_volinfo,
                                   &priv->volumes, vol_list) {
+                /* The peer detach checks make sure that, at this point in the
+                 * detach process, there are only volumes contained completely
+                 * within or completely outside the detached peer.
+                 * The only stale volumes at this point are the ones
+                 * completely outside the peer and can be safely deleted.
+                 */
                 if (!glusterd_friend_contains_vol_bricks (volinfo,
                                                           MY_UUID)) {
                         gf_log (THIS->name, GF_LOG_INFO,
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 721ffe27f..eb6fb6757 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -7387,29 +7387,41 @@ out:
         return ret;
 }
 
-/* Checks if the given peer contains all the bricks belonging to the
- * given volume. Returns true if it does else returns false
+/* Checks if the given peer contains bricks belonging to the given volume.
+ * Returns,
+ *   2 - if peer contains all the bricks
+ *   1 - if peer contains at least 1 brick
+ *   0 - if peer contains no bricks
  */
-gf_boolean_t
+int
 glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo,
                                      uuid_t friend_uuid)
 {
-        gf_boolean_t            ret = _gf_true;
+        int                     ret = 0;
         glusterd_brickinfo_t    *brickinfo = NULL;
+        int                     count = 0;
 
         GF_ASSERT (volinfo);
 
         list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
-                if (uuid_compare (friend_uuid, brickinfo->uuid)) {
-                        ret = _gf_false;
-                        break;
+                if (!uuid_compare (brickinfo->uuid, friend_uuid)) {
+                        count++;
                 }
         }
+
+        if (count) {
+                if (count == volinfo->brick_count)
+                        ret = 2;
+                else
+                        ret = 1;
+        }
         gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
         return ret;
 }
 
-/* Remove all volumes which completely belong to given friend
+/* Cleanup the stale volumes left behind in the cluster. The volumes which are
+ * contained completely within the detached peer are stale with respect to the
+ * cluster.
  */
 int
 glusterd_friend_remove_cleanup_vols (uuid_t uuid)
@@ -7424,7 +7436,7 @@ glusterd_friend_remove_cleanup_vols (uuid_t uuid)
 
         list_for_each_entry_safe (volinfo, tmp_volinfo,
                                   &priv->volumes, vol_list) {
-                if (glusterd_friend_contains_vol_bricks (volinfo, uuid)) {
+                if (glusterd_friend_contains_vol_bricks (volinfo, uuid) == 2) {
                         gf_log (THIS->name, GF_LOG_INFO,
                                 "Deleting stale volume %s", volinfo->volname);
                         ret = glusterd_delete_volume (volinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 1964c88c5..23f8ad7f6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -453,7 +453,7 @@ glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo);
 gf_boolean_t
 glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname,
                                   char *path);
-gf_boolean_t
+int
 glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo,
                                      uuid_t friend_uuid);
 int
author	Kaushal M <kaushal@redhat.com>	2013-07-11 19:42:16 +0530
committer	Vijay Bellur <vbellur@redhat.com>	2014-04-11 17:12:10 -0700
commit	0e7f8af0db8201ee892979713ac86d5548f5ec73 (patch)
tree	2c06ec38bafe7053f7ad63db080a578dccd032f0
parent	29bccc2ed18eedc40e83d2f0d35327037a322384 (diff)