From e125e2ae61c31da798ea9a7342ea9292f47c1d6b Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Tue, 19 Feb 2013 12:11:57 +0530 Subject: glusterd: Mark vol as deleted by renaming voldir before cleaning up the store PROBLEM: During 'volume delete', when glusterd fails to erase all information about a volume from the backend store (for instance because rmdir() failed on non-empty directories), not only does volume delete fail on that node, but also subsequent attempts to restart glusterd fail because the volume store is left in an inconsistent state. FIX: Rename the volume directory path to a new location /trash/.deleted, and then go on to clean up its contents. The volume is considered deleted once rename() succeeds, irrespective of whether the cleanup succeeds or not. Change-Id: Iaf18e1684f0b101808bd5e1cd53a5d55790541a8 BUG: 889630 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/4639 Reviewed-by: Amar Tumballi Reviewed-by: Kaushal M Reviewed-by: Jeff Darcy Tested-by: Gluster Build System --- tests/bugs/bug-889630.t | 56 +++++++++++++++ xlators/mgmt/glusterd/src/glusterd-store.c | 111 ++++++++++++++++++----------- xlators/mgmt/glusterd/src/glusterd-store.h | 4 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 6 +- xlators/mgmt/glusterd/src/glusterd.h | 1 + 5 files changed, 134 insertions(+), 44 deletions(-) create mode 100755 tests/bugs/bug-889630.t diff --git a/tests/bugs/bug-889630.t b/tests/bugs/bug-889630.t new file mode 100755 index 00000000000..b04eb34076e --- /dev/null +++ b/tests/bugs/bug-889630.t @@ -0,0 +1,56 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../cluster.rc + +function check_peers { + $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l +} + +function volume_count { + local cli=$1; + if [ $cli -eq '1' ] ; then + $CLI_1 volume info | grep 'Volume Name' | wc -l; + else + $CLI_2 volume info | grep 'Volume Name' | wc -l; + fi +} + +cleanup; + +TEST launch_cluster 2; +TEST $CLI_1 peer probe $H2; + +EXPECT_WITHIN 20 1 check_peers + +TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 +TEST $CLI_1 volume start $V0 + +b="B1"; + +#Create an extra file in the originator's volume store +touch ${!b}/glusterd/vols/$V0/run/file + +TEST $CLI_1 volume stop $V0 +#Test for self-commit failure +TEST $CLI_1 volume delete $V0 + +#Check whether delete succeeded on both the nodes +EXPECT "0" volume_count '1' +EXPECT "0" volume_count '2' + +#Check whether the volume name can be reused after deletion +TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 +TEST $CLI_1 volume start $V0 + +#Create an extra file in the peer's volume store +touch ${!b}/glusterd/vols/$V0/run/file + +TEST $CLI_1 volume stop $V0 +#Test for commit failure on the other node +TEST $CLI_2 volume delete $V0 + +EXPECT "0" volume_count '1'; +EXPECT "0" volume_count '2'; + +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 085e3e85dc4..7e26eb4a74e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -446,12 +446,10 @@ out: } int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path) { int32_t ret = -1; glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; char brickpath[PATH_MAX] = {0,}; char *ptr = NULL; char *tmppath = NULL; @@ -459,15 +457,11 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, this = THIS; GF_ASSERT (this); - GF_ASSERT (volinfo); GF_ASSERT (brickinfo); priv = this->private; - GF_ASSERT (priv); - GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); - tmppath = gf_strdup (brickinfo->path); ptr = strchr (tmppath, '/'); @@ -477,15 +471,16 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, ptr = strchr (tmppath, '/'); } - snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", - path, brickinfo->hostname, tmppath); + snprintf (brickpath, sizeof (brickpath), + "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path, + brickinfo->hostname, tmppath); GF_FREE (tmppath); ret = unlink (brickpath); if ((ret < 0) && (errno != ENOENT)) { - gf_log (this->name, GF_LOG_ERROR, "Unlink failed on %s, " + gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, " "reason: %s", brickpath, strerror(errno)); ret = -1; goto out; @@ -503,7 +498,7 @@ out: } int32_t -glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) +glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path) { int32_t ret = 0; glusterd_brickinfo_t *tmp = NULL; @@ -520,7 +515,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - ret = glusterd_store_delete_brick (volinfo, tmp); + ret = glusterd_store_delete_brick (tmp, delete_path); if (ret) goto out; } @@ -528,7 +523,8 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) priv = this->private; GF_ASSERT (priv); - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); + snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path, + GLUSTERD_BRICK_INFO_DIR); dir = opendir (brickdir); @@ -539,7 +535,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) brickdir, entry->d_name); ret = unlink (path); if (ret && errno != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, "Unable to unlink %s, " + gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, " "reason: %s", path, strerror(errno)); } glusterd_for_each_entry (entry, dir); @@ -1245,14 +1241,17 @@ out: int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) { - char pathname[PATH_MAX] = {0,}; - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; - char path[PATH_MAX] = {0,}; - struct stat st = {0, }; - xlator_t *this = NULL; + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; this = THIS; GF_ASSERT (this); @@ -1261,29 +1260,53 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) priv = this->private; GF_ASSERT (priv); - snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir, - volinfo->volname); - dir = opendir (pathname); + GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); + + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, + uuid_utoa (volinfo->volume_id)); + + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); + + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume " + "directory for volume %s", volinfo->volname); + rename_fail = _gf_true; + goto out; + } + + dir = opendir (delete_path); if (!dir) { - gf_log (this->name, GF_LOG_ERROR, "Failed to open directory %s." - " Reason : %s", pathname, strerror (errno)); + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; goto out; } - ret = glusterd_store_remove_bricks (volinfo); + ret = glusterd_store_remove_bricks (volinfo, delete_path); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Remove bricks failed for %s", + gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s", volinfo->volname); } glusterd_for_each_entry (entry, dir); while (entry) { - snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name); + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); ret = stat (path, &st); if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "Failed to stat " + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " "entry %s : %s", path, strerror (errno)); goto stat_failed; } @@ -1293,11 +1316,12 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) else ret = unlink (path); - if (ret) - gf_log (this->name, GF_LOG_ERROR, " Failed to remove " + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " "%s. Reason : %s", path, strerror (errno)); + } - gf_log (this->name, ret ? GF_LOG_ERROR : GF_LOG_DEBUG, "%s %s", + gf_log (this->name, GF_LOG_DEBUG, "%s %s", ret ? "Failed to remove":"Removed", entry->d_name); stat_failed: @@ -1307,24 +1331,29 @@ stat_failed: ret = closedir (dir); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to close dir %s. " - "Reason : %s",pathname, strerror (errno)); + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); } - ret = rmdir (pathname); + ret = rmdir (delete_path); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to rmdir: %s, err: %s", - pathname, strerror (errno)); + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); + } + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); } - out: if (volinfo->shandle) { glusterd_store_handle_destroy (volinfo->shandle); volinfo->shandle = NULL; } - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + ret = (rename_fail == _gf_true) ? -1: 0; + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 68977dd9ce5..762604e23d3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -117,8 +117,8 @@ int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo); int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, + char *delete_path); int32_t glusterd_store_handle_destroy (glusterd_store_handle_t *handle); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index d07b8b1a51e..09e3ff66940 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5495,11 +5495,15 @@ glusterd_delete_brick (glusterd_volinfo_t* volinfo, glusterd_brickinfo_t *brickinfo) { int ret = 0; + char voldir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); + GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv); + glusterd_delete_volfile (volinfo, brickinfo); - glusterd_store_delete_brick (volinfo, brickinfo); + glusterd_store_delete_brick (brickinfo, voldir); glusterd_brickinfo_delete (brickinfo); volinfo->brick_count--; return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index c9e8d42d341..34593202b07 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -338,6 +338,7 @@ enum glusterd_vol_comp_status_ { #define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate" #define GLUSTERD_BRICK_INFO_DIR "bricks" #define GLUSTERD_CKSUM_FILE "cksum" +#define GLUSTERD_TRASH "trash" #define GLUSTERD_NODE_STATE_FILE "node_state.info" /* definitions related to replace brick */ -- cgit