summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2019-07-15 11:26:55 +0530
committerAtin Mukherjee <amukherj@redhat.com>2019-08-25 08:18:54 +0000
commitf0be15cd534b91f88c0239ae850f22c5f2b90c0f (patch)
treee70bbad0eeb3f5343c126bdb3045b5914bb046de
parent093c66a0af496301e5d28d55f4fe57362cb399d5 (diff)
glusterd: stop stale bricks during handshaking in brick mux mode
This patch addresses two problems: 1. During friend handshaking, if a volume is imported due to change in the version, the old bricks were not stopped which would lead to a situation where bricks will run with old volfiles. 2. As part of attaching shd service in glusterd_attach_svc, there might be a case that the volume for which we're attempting to attach a shd service might become stale and in the process of deletion and hence in every retrials (if the rpc connection isn't ready) check for the existance of the volume and then only attempt the further attach request. Fixes: bz#1733425 Change-Id: I6bac6b871f7e31cb5bf277db979289dec196a03e Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c16
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h3
4 files changed, 55 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
index 5077ec1091f..eaed873d430 100644
--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -263,9 +263,6 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
volinfo = data;
GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
- if (volinfo)
- glusterd_volinfo_ref(volinfo);
-
if (volinfo->is_snap_volume) {
/* healing of a snap volume is not supported yet*/
ret = 0;
@@ -280,9 +277,8 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
conf->restart_shd = _gf_true;
shd_restart = _gf_true;
- ret = glusterd_shdsvc_create_volfile(volinfo);
- if (ret)
- goto out;
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
if (!glusterd_is_shd_compatible_volume(volinfo)) {
ret = 0;
@@ -294,6 +290,9 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
}
goto out;
}
+ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret)
+ goto out;
ret = glusterd_shd_svc_mux_init(volinfo, svc);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
index cf40a758082..3b965ea1ed8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
@@ -924,6 +924,22 @@ glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
rpc = rpc_clnt_ref(svc->conn.rpc);
for (tries = 15; tries > 0; --tries) {
+ /* There might be a case that the volume for which we're attempting to
+ * attach a shd svc might become stale and in the process of deletion.
+ * Given that the volinfo object is being already passed here before
+ * that sequence of operation has happened we might be operating on a
+ * stale volume. At every sync task switch we should check for existance
+ * of the volume now
+ */
+ if (!glusterd_volume_exists(volinfo->volname)) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "Volume %s "
+ " is marked as stale, not attempting further shd svc attach "
+ "attempts",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ }
if (rpc) {
pthread_mutex_lock(&conf->attach_lock);
{
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index d91b672e47e..db17fe6d609 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1654,6 +1654,33 @@ glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo)
return ret;
}
+gf_boolean_t
+glusterd_volume_exists(const char *volname)
+{
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ gf_boolean_t volume_found = _gf_false;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT(volname);
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ cds_list_for_each_entry(tmp_volinfo, &priv->volumes, vol_list)
+ {
+ if (!strcmp(tmp_volinfo->volname, volname)) {
+ gf_msg_debug(this->name, 0, "Volume %s found", volname);
+ volume_found = _gf_true;
+ break;
+ }
+ }
+
+ return volume_found;
+}
+
int32_t
glusterd_service_stop(const char *service, char *pidfile, int sig,
gf_boolean_t force_kill)
@@ -4714,10 +4741,11 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo,
old_brickinfo->uuid, old_brickinfo->hostname, old_brickinfo->path,
new_volinfo, &new_brickinfo);
/* If the brick is stale, i.e it's not a part of the new volume
- * or if it's part of the new volume and is pending a snap,
- * then stop the brick process
+ * or if it's part of the new volume and is pending a snap or if it's
+ * brick multiplexing enabled, then stop the brick process
*/
- if (ret || (new_brickinfo->snap_status == -1)) {
+ if (ret || (new_brickinfo->snap_status == -1) ||
+ is_brick_mx_enabled()) {
/*TODO: may need to switch to 'atomic' flavour of
* brick_stop, once we make peer rpc program also
* synctask enabled*/
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index ed117e02267..c6a7545f4eb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -166,6 +166,9 @@ glusterd_brickinfo_new_from_brick(char *brick, glusterd_brickinfo_t **brickinfo,
int32_t
glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo);
+gf_boolean_t
+glusterd_volume_exists(const char *volname);
+
int
glusterd_volinfo_find_by_volume_id(uuid_t volume_id,
glusterd_volinfo_t **volinfo);