summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2017-02-01 21:54:30 -0500
committerShyamsundar Ranganathan <srangana@redhat.com>2017-02-02 19:45:03 -0500
commitda30f79c9e35ab8cca71601a33665af72d0880ff (patch)
tree5185ac92c1f1f69d86c796b3c1d6685e494761fe
parent1ed73ffa16cb7fe4415acbdb095da6a4628f711a (diff)
glusterd: double-check whether brick is alive for stats
With multiplexing, our tests detach bricks from their host processes without glusterd being involved. Thus, when we ask glusterd to fetch profile info, it will try to fetch from a brick that's actually not present any more. While it can handle the process being dead and its RPC connection being closed, it barfs if it gets a negative response from a live brick process. This is not a problem in normal use, because the brick can't disappear without glusterd seeing it. The fix is to double check that the brick is actually running, by looking for its pidfile which the tests *do* clean up as part of killing a brick. Backport of: > Change-Id: I098465b175ecf23538bd7207357c752a2bba8f4e > BUG: 1385758 > Reviewed-on: https://review.gluster.org/16509 BUG: 1418091 Change-Id: Ia61e273134520c8ccfa3371ee2370cb9a1920877 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/16532 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c17
1 files changed, 14 insertions, 3 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index d9b18e00195..6bc01f702cc 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -6315,15 +6315,14 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr,
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_pending_node_t *pending_node = NULL;
char *brick = NULL;
-
-
+ int32_t pid = -1;
+ char pidfile[PATH_MAX] = {0};
this = THIS;
GF_ASSERT (this);
priv = this->private;
GF_ASSERT (priv);
-
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
gf_msg ("glusterd", GF_LOG_ERROR, 0,
@@ -6383,6 +6382,18 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr,
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
if (glusterd_is_brick_started (brickinfo)) {
+ /*
+ * In normal use, glusterd_is_brick_started
+ * will give us the answer we need. However,
+ * in our tests the brick gets detached behind
+ * our back, so we need to double-check this
+ * way.
+ */
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ brickinfo, priv);
+ if (!gf_is_service_running (pidfile, &pid)) {
+ continue;
+ }
pending_node = GF_CALLOC (1, sizeof (*pending_node),
gf_gld_mt_pending_node_t);
if (!pending_node) {