summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2017-05-19 21:04:53 +0530
committerJeff Darcy <jeff@pl.atyp.us>2017-05-26 12:11:28 +0000
commit3ca5ae2f3bff2371042b607b8e8a218bf316b48c (patch)
tree99671a396708a555c86d16c7bc70d41dd491e603
parent23930326e0378edace9c8c41e8ae95931a2f68ba (diff)
glusterfsd: process attach and detach request inside lock
With brick multiplexing, there is a high possibility that attach and detach requests might be parallely processed and to avoid a concurrent update to the same graph list, a mutex lock is required. Credits : Rafi (rkavunga@redhat.com) for the RCA of this issue Change-Id: Ic8e6d1708655c8a143c5a3690968dfa572a32a9c BUG: 1454865 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-on: https://review.gluster.org/17374 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c173
-rw-r--r--xlators/protocol/server/src/server-handshake.c9
2 files changed, 105 insertions, 77 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 698cd708799..8ede110121b 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -198,8 +198,9 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
{
gd1_mgmt_brick_op_req xlator_req = {0,};
ssize_t ret;
- xlator_t *top;
- xlator_t *victim;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ glusterfs_ctx_t *ctx = NULL;
xlator_list_t **trav_p;
ret = xdr_to_generic (req->msg[0], &xlator_req,
@@ -208,52 +209,62 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
req->rpc_err = GARBAGE_ARGS;
return -1;
}
+ ctx = glusterfsd_ctx;
- /* Find the xlator_list_t that points to our victim. */
- top = glusterfsd_ctx->active->first;
- for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
- victim = (*trav_p)->xlator;
- if (strcmp (victim->name, xlator_req.name) == 0) {
- break;
+ LOCK (&ctx->volfile_lock);
+ {
+ /* Find the xlator_list_t that points to our victim. */
+ top = glusterfsd_ctx->active->first;
+ for (trav_p = &top->children; *trav_p;
+ trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (strcmp (victim->name, xlator_req.name) == 0) {
+ break;
+ }
}
- }
- if (!*trav_p) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "can't terminate %s - not found", xlator_req.name);
- /*
- * Used to be -ENOENT. However, the caller asked us to make
- * sure it's down and if it's already down that's good enough.
- */
- glusterfs_terminate_response_send (req, 0);
- goto err;
- }
+ if (!*trav_p) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "can't terminate %s - not found",
+ xlator_req.name);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
+ glusterfs_terminate_response_send (req, 0);
+ goto err;
+ }
- glusterfs_terminate_response_send (req, 0);
- if ((trav_p == &top->children) && !(*trav_p)->next) {
- gf_log (THIS->name, GF_LOG_INFO,
- "terminating after loss of last child %s",
- xlator_req.name);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
- cleanup_and_exit (SIGTERM);
- } else {
- /*
- * This is terribly unsafe without quiescing or shutting things
- * down properly (or even locking) but it gets us to the point
- * where we can test other stuff.
- *
- * TBD: finish implementing this "detach" code properly
- */
- gf_log (THIS->name, GF_LOG_INFO, "detaching not-only child %s",
- xlator_req.name);
- top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
+ glusterfs_terminate_response_send (req, 0);
+ if ((trav_p == &top->children) && !(*trav_p)->next) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "terminating after loss of last child %s",
+ xlator_req.name);
+ glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
+ xlator_req.name);
+ kill (getpid(), SIGTERM);
+ } else {
+ /*
+ * This is terribly unsafe without quiescing or shutting
+ * things down properly but it gets us to the point
+ * where we can test other stuff.
+ *
+ * TBD: finish implementing this "detach" code properly
+ */
+ gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
+ " child %s", xlator_req.name);
+ top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
+ glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
+ xlator_req.name);
+
+ *trav_p = (*trav_p)->next;
+ glusterfs_autoscale_threads (THIS->ctx, -1);
+ }
- *trav_p = (*trav_p)->next;
- glusterfs_autoscale_threads (THIS->ctx, -1);
}
-
err:
+ UNLOCK (&ctx->volfile_lock);
free (xlator_req.name);
xlator_req.name = NULL;
return 0;
@@ -828,6 +839,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
gd1_mgmt_brick_op_req xlator_req = {0,};
xlator_t *this = NULL;
glusterfs_graph_t *newgraph = NULL;
+ glusterfs_ctx_t *ctx = NULL;
GF_ASSERT (req);
this = THIS;
@@ -842,32 +854,38 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
return -1;
}
ret = 0;
+ ctx = this->ctx;
- if (this->ctx->active) {
- gf_log (this->name, GF_LOG_INFO,
- "got attach for %s", xlator_req.name);
- ret = glusterfs_graph_attach (this->ctx->active,
- xlator_req.name, &newgraph);
- if (ret == 0) {
- ret = glusterfs_graph_parent_up (newgraph);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- LG_MSG_EVENT_NOTIFY_FAILED,
- "Parent up notification "
- "failed");
- goto out;
+ LOCK (&ctx->volfile_lock);
+ {
+ if (this->ctx->active) {
+ gf_log (this->name, GF_LOG_INFO,
+ "got attach for %s", xlator_req.name);
+ ret = glusterfs_graph_attach (this->ctx->active,
+ xlator_req.name,
+ &newgraph);
+ if (ret == 0) {
+ ret = glusterfs_graph_parent_up (newgraph);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ LG_MSG_EVENT_NOTIFY_FAILED,
+ "Parent up notification "
+ "failed");
+ goto out;
+ }
+ glusterfs_autoscale_threads (this->ctx, 1);
}
- glusterfs_autoscale_threads (this->ctx, 1);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got attach for %s but no active graph",
+ xlator_req.name);
}
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "got attach for %s but no active graph",
- xlator_req.name);
- }
- glusterfs_translator_info_response_send (req, ret, NULL, NULL);
+ glusterfs_translator_info_response_send (req, ret, NULL, NULL);
out:
+ UNLOCK (&ctx->volfile_lock);
+ }
free (xlator_req.input.input_val);
free (xlator_req.name);
@@ -1981,23 +1999,28 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
xlator_list_t *trav;
int ret;
- if (ctx->active) {
- server_xl = ctx->active->first;
- if (strcmp (server_xl->type, "protocol/server") != 0) {
- server_xl = NULL;
+ LOCK (&ctx->volfile_lock);
+ {
+ if (ctx->active) {
+ server_xl = ctx->active->first;
+ if (strcmp (server_xl->type, "protocol/server") != 0) {
+ server_xl = NULL;
+ }
+ }
+ if (!server_xl) {
+ /* Startup (ctx->active not set) or non-server. */
+ UNLOCK (&ctx->volfile_lock);
+ return glusterfs_volfile_fetch_one
+ (ctx, ctx->cmd_args.volfile_id);
}
- }
- if (!server_xl) {
- /* Startup (ctx->active not set) or non-server. */
- return glusterfs_volfile_fetch_one (ctx,
- ctx->cmd_args.volfile_id);
- }
- ret = 0;
- for (trav = server_xl->children; trav; trav = trav->next) {
- ret |= glusterfs_volfile_fetch_one (ctx,
- trav->xlator->volfile_id);
+ ret = 0;
+ for (trav = server_xl->children; trav; trav = trav->next) {
+ ret |= glusterfs_volfile_fetch_one
+ (ctx, trav->xlator->volfile_id);
+ }
}
+ UNLOCK (&ctx->volfile_lock);
return ret;
}
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index 64267f2aef9..f00804a3d3a 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -412,7 +412,7 @@ server_setvolume (rpcsvc_request_t *req)
rpc_transport_t *xprt = NULL;
int32_t fop_version = 0;
int32_t mgmt_version = 0;
-
+ glusterfs_ctx_t *ctx = NULL;
params = dict_new ();
reply = dict_new ();
@@ -423,6 +423,7 @@ server_setvolume (rpcsvc_request_t *req)
req->rpc_err = GARBAGE_ARGS;
goto fail;
}
+ ctx = THIS->ctx;
this = req->svc->xl;
/* this is to ensure config_params is populated with the first brick
@@ -468,7 +469,11 @@ server_setvolume (rpcsvc_request_t *req)
goto fail;
}
- xl = get_xlator_by_name (this, name);
+ LOCK (&ctx->volfile_lock);
+ {
+ xl = get_xlator_by_name (this, name);
+ }
+ UNLOCK (&ctx->volfile_lock);
if (xl == NULL) {
ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
name);