From 7c0d35b2e7e234084d0414b3cf1e09969a43a677 Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Thu, 27 Dec 2012 15:57:13 +0530 Subject: glusterd: harden 'volume start' staging to check for brick dirs' presence PROBLEM: When the brick directory of a volume is absent on any of the servers, AND an attempt is made to start the volume, commit fails ONLY on the node where the brick dir is absent, leading to a split-brain like situation. FIX: Harden 'volume start' to check for the presence of brick directories at the time of staging, thereby preventing commit failure. Change-Id: I67faeb9afbd3aa76f08645924462db126bf7a977 BUG: 889996 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/4365 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 62 +++++++++++++++++++------ 1 file changed, 49 insertions(+), 13 deletions(-) (limited to 'xlators/mgmt') diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index cf424abb2d5..fb1832f0d47 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -814,6 +814,9 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) char msg[2048]; glusterd_conf_t *priv = NULL; xlator_t *this = NULL; + uuid_t volume_id = {0,}; + char volid[50] = {0,}; + char xattr_volid[50] = {0,}; this = THIS; GF_ASSERT (this); @@ -828,8 +831,6 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) if (!exists) { snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); - gf_log (this->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); ret = -1; goto out; } @@ -845,6 +846,15 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) if (ret) goto out; + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + if (glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s already " + "started", volname); + ret = -1; + goto out; + } + } + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_resolve_brick (brickinfo); if (ret) { @@ -853,22 +863,48 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) goto out; } - if (!(flags & GF_CLI_FLAG_OP_FORCE)) { - if (glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof (msg), "Volume %s already" - " started", volname); - gf_log (this->name, GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (volinfo->backend == GD_VOL_BK_BD) + continue; + + ret = gf_lstat_dir (brickinfo->path, NULL); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to find " + "brick directory %s for volume %s. " + "Reason : %s", brickinfo->path, + volname, strerror (errno)); + goto out; + } + ret = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY, + volume_id, 16); + if (ret < 0) { + snprintf (msg, sizeof (msg), "Failed to get " + "extended attribute %s for brick dir %s. " + "Reason : %s", GF_XATTR_VOL_ID_KEY, + brickinfo->path, strerror (errno)); + ret = -1; + goto out; + } + if (uuid_compare (volinfo->volume_id, volume_id)) { + snprintf (msg, sizeof (msg), "Volume id mismatch for " + "brick %s:%s. Expected volume id %s, " + "volume id %s found", brickinfo->hostname, + brickinfo->path, + uuid_utoa_r (volinfo->volume_id, volid), + uuid_utoa_r (volume_id, xattr_volid)); + ret = -1; + goto out; } } ret = 0; out: - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - + if (ret && (msg[0] != '\0')) { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } return ret; } -- cgit