summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2013-02-19 12:11:57 +0530
committerAnand Avati <avati@redhat.com>2013-03-11 14:07:19 -0700
commite125e2ae61c31da798ea9a7342ea9292f47c1d6b (patch)
tree7095ef234d76ce5c7152ca9d847afc3e7a18b610
parentbc4350423a33d21464b507b4e229eb5244e0cb6e (diff)
glusterd: Mark vol as deleted by renaming voldir before cleaning up the store
PROBLEM: During 'volume delete', when glusterd fails to erase all information about a volume from the backend store (for instance because rmdir() failed on non-empty directories), not only does volume delete fail on that node, but also subsequent attempts to restart glusterd fail because the volume store is left in an inconsistent state. FIX: Rename the volume directory path to a new location <working-dir>/trash/<volume-id>.deleted, and then go on to clean up its contents. The volume is considered deleted once rename() succeeds, irrespective of whether the cleanup succeeds or not. Change-Id: Iaf18e1684f0b101808bd5e1cd53a5d55790541a8 BUG: 889630 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/4639 Reviewed-by: Amar Tumballi <amarts@redhat.com> Reviewed-by: Kaushal M <kaushal@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rwxr-xr-xtests/bugs/bug-889630.t56
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c111
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
5 files changed, 134 insertions, 44 deletions
diff --git a/tests/bugs/bug-889630.t b/tests/bugs/bug-889630.t
new file mode 100755
index 000000000..b04eb3407
--- /dev/null
+++ b/tests/bugs/bug-889630.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function volume_count {
+ local cli=$1;
+ if [ $cli -eq '1' ] ; then
+ $CLI_1 volume info | grep 'Volume Name' | wc -l;
+ else
+ $CLI_2 volume info | grep 'Volume Name' | wc -l;
+ fi
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+b="B1";
+
+#Create an extra file in the originator's volume store
+touch ${!b}/glusterd/vols/$V0/run/file
+
+TEST $CLI_1 volume stop $V0
+#Test for self-commit failure
+TEST $CLI_1 volume delete $V0
+
+#Check whether delete succeeded on both the nodes
+EXPECT "0" volume_count '1'
+EXPECT "0" volume_count '2'
+
+#Check whether the volume name can be reused after deletion
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
+TEST $CLI_1 volume start $V0
+
+#Create an extra file in the peer's volume store
+touch ${!b}/glusterd/vols/$V0/run/file
+
+TEST $CLI_1 volume stop $V0
+#Test for commit failure on the other node
+TEST $CLI_2 volume delete $V0
+
+EXPECT "0" volume_count '1';
+EXPECT "0" volume_count '2';
+
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 085e3e85d..7e26eb4a7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -446,12 +446,10 @@ out:
}
int32_t
-glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo)
+glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path)
{
int32_t ret = -1;
glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
char brickpath[PATH_MAX] = {0,};
char *ptr = NULL;
char *tmppath = NULL;
@@ -459,15 +457,11 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
priv = this->private;
-
GF_ASSERT (priv);
- GLUSTERD_GET_BRICK_DIR (path, volinfo, priv);
-
tmppath = gf_strdup (brickinfo->path);
ptr = strchr (tmppath, '/');
@@ -477,15 +471,16 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
ptr = strchr (tmppath, '/');
}
- snprintf (brickpath, sizeof (brickpath), "%s/%s:%s",
- path, brickinfo->hostname, tmppath);
+ snprintf (brickpath, sizeof (brickpath),
+ "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path,
+ brickinfo->hostname, tmppath);
GF_FREE (tmppath);
ret = unlink (brickpath);
if ((ret < 0) && (errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR, "Unlink failed on %s, "
+ gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, "
"reason: %s", brickpath, strerror(errno));
ret = -1;
goto out;
@@ -503,7 +498,7 @@ out:
}
int32_t
-glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
+glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path)
{
int32_t ret = 0;
glusterd_brickinfo_t *tmp = NULL;
@@ -520,7 +515,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo);
list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
- ret = glusterd_store_delete_brick (volinfo, tmp);
+ ret = glusterd_store_delete_brick (tmp, delete_path);
if (ret)
goto out;
}
@@ -528,7 +523,8 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
priv = this->private;
GF_ASSERT (priv);
- GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv);
+ snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path,
+ GLUSTERD_BRICK_INFO_DIR);
dir = opendir (brickdir);
@@ -539,7 +535,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
brickdir, entry->d_name);
ret = unlink (path);
if (ret && errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to unlink %s, "
+ gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, "
"reason: %s", path, strerror(errno));
}
glusterd_for_each_entry (entry, dir);
@@ -1245,14 +1241,17 @@ out:
int32_t
glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
{
- char pathname[PATH_MAX] = {0,};
- int32_t ret = 0;
- glusterd_conf_t *priv = NULL;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- char path[PATH_MAX] = {0,};
- struct stat st = {0, };
- xlator_t *this = NULL;
+ char pathname[PATH_MAX] = {0,};
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char path[PATH_MAX] = {0,};
+ char delete_path[PATH_MAX] = {0,};
+ char trashdir[PATH_MAX] = {0,};
+ struct stat st = {0, };
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
this = THIS;
GF_ASSERT (this);
@@ -1261,29 +1260,53 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
priv = this->private;
GF_ASSERT (priv);
- snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir,
- volinfo->volname);
- dir = opendir (pathname);
+ GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir,
+ uuid_utoa (volinfo->volume_id));
+
+ snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
+ priv->workdir);
+
+ ret = mkdir (trashdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create trash "
+ "directory, reason : %s", strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = rename (pathname, delete_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume "
+ "directory for volume %s", volinfo->volname);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ dir = opendir (delete_path);
if (!dir) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to open directory %s."
- " Reason : %s", pathname, strerror (errno));
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s."
+ " Reason : %s", delete_path, strerror (errno));
+ ret = 0;
goto out;
}
- ret = glusterd_store_remove_bricks (volinfo);
+ ret = glusterd_store_remove_bricks (volinfo, delete_path);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Remove bricks failed for %s",
+ gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s",
volinfo->volname);
}
glusterd_for_each_entry (entry, dir);
while (entry) {
- snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name);
+ snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
ret = stat (path, &st);
if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to stat "
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to stat "
"entry %s : %s", path, strerror (errno));
goto stat_failed;
}
@@ -1293,11 +1316,12 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
else
ret = unlink (path);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, " Failed to remove "
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, " Failed to remove "
"%s. Reason : %s", path, strerror (errno));
+ }
- gf_log (this->name, ret ? GF_LOG_ERROR : GF_LOG_DEBUG, "%s %s",
+ gf_log (this->name, GF_LOG_DEBUG, "%s %s",
ret ? "Failed to remove":"Removed",
entry->d_name);
stat_failed:
@@ -1307,24 +1331,29 @@ stat_failed:
ret = closedir (dir);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to close dir %s. "
- "Reason : %s",pathname, strerror (errno));
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. "
+ "Reason : %s",delete_path, strerror (errno));
}
- ret = rmdir (pathname);
+ ret = rmdir (delete_path);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to rmdir: %s, err: %s",
- pathname, strerror (errno));
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s",
+ delete_path, strerror (errno));
+ }
+ ret = rmdir (trashdir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:"
+ " %s", trashdir, strerror (errno));
}
-
out:
if (volinfo->shandle) {
glusterd_store_handle_destroy (volinfo->shandle);
volinfo->shandle = NULL;
}
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ ret = (rename_fail == _gf_true) ? -1: 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 68977dd9c..762604e23 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -117,8 +117,8 @@ int32_t
glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo);
int32_t
-glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo);
+glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo,
+ char *delete_path);
int32_t
glusterd_store_handle_destroy (glusterd_store_handle_t *handle);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index d07b8b1a5..09e3ff669 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5495,11 +5495,15 @@ glusterd_delete_brick (glusterd_volinfo_t* volinfo,
glusterd_brickinfo_t *brickinfo)
{
int ret = 0;
+ char voldir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
+ GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv);
+
glusterd_delete_volfile (volinfo, brickinfo);
- glusterd_store_delete_brick (volinfo, brickinfo);
+ glusterd_store_delete_brick (brickinfo, voldir);
glusterd_brickinfo_delete (brickinfo);
volinfo->brick_count--;
return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index c9e8d42d3..34593202b 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -338,6 +338,7 @@ enum glusterd_vol_comp_status_ {
#define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate"
#define GLUSTERD_BRICK_INFO_DIR "bricks"
#define GLUSTERD_CKSUM_FILE "cksum"
+#define GLUSTERD_TRASH "trash"
#define GLUSTERD_NODE_STATE_FILE "node_state.info"
/* definitions related to replace brick */