From dba55ae364a2772904bb68a6bd0ea87289ee1470 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Thu, 25 May 2017 21:43:42 +0530 Subject: glusterfs: Not able to mount running volume after enable brick mux and stopped any volume Problem: After enabled brick mux if any volume has down and then try ot run mount with running volume , mount command is hung. Solution: After enable brick mux server has shared one data structure server_conf for all associated subvolumes.After down any subvolume in some ungraceful manner (remove brick directory) posix xlator sends GF_EVENT_CHILD_DOWN event to parent xlatros and server notify updates the child_up to false in server_conf.When client is trying to communicate with server through mount it checks conf->child_up and it is FALSE so it throws message "translator are not yet ready". From this patch updated structure server_conf to save child_up status for xlator wise. Another improtant correction from this patch is cleanup threads from server side xlators after stop the volume. BUG: 1453977 Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d Signed-off-by: Mohit Agrawal Reviewed-on: https://review.gluster.org/17356 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System Reviewed-by: Raghavendra Talur CentOS-regression: Gluster Build System Reviewed-by: Jeff Darcy --- xlators/protocol/server/src/Makefile.am | 3 +- xlators/protocol/server/src/server-handshake.c | 24 +++++-- xlators/protocol/server/src/server-mem-types.h | 1 + xlators/protocol/server/src/server-messages.h | 10 ++- xlators/protocol/server/src/server.c | 95 +++++++++++++++++++++++--- xlators/protocol/server/src/server.h | 10 ++- 6 files changed, 121 insertions(+), 22 deletions(-) (limited to 'xlators/protocol/server/src') diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am index d65e5b89957..72f811e97fb 100644 --- a/xlators/protocol/server/src/Makefile.am +++ b/xlators/protocol/server/src/Makefile.am @@ -20,7 +20,8 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \ -I$(top_srcdir)/xlators/protocol/lib/src \ -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/glusterfsd/src AM_CFLAGS = -Wall $(GF_CFLAGS) \ -DDATADIR=\"$(localstatedir)\" diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index f00804a3d3a..f8f8f99f123 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -413,6 +413,7 @@ server_setvolume (rpcsvc_request_t *req) int32_t fop_version = 0; int32_t mgmt_version = 0; glusterfs_ctx_t *ctx = NULL; + struct _child_status *tmp = NULL; params = dict_new (); reply = dict_new (); @@ -512,13 +513,24 @@ server_setvolume (rpcsvc_request_t *req) "initialised yet. Try again later"); goto fail; } - - ret = dict_set_int32 (reply, "child_up", conf->child_up); - if (ret < 0) + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (strcmp (tmp->name, name) == 0) + break; + } + if (!tmp->name) { gf_msg (this->name, GF_LOG_ERROR, 0, - PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' " - "in the reply dict"); - + PS_MSG_CHILD_STATUS_FAILED, + "No xlator %s is found in " + "child status list", name); + } else { + ret = dict_set_int32 (reply, "child_up", tmp->child_up); + if (ret < 0) + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_DICT_GET_FAILED, + "Failed to set 'child_up' for xlator %s " + "in the reply dict", tmp->name); + } ret = dict_get_str (params, "process-uuid", &client_uid); if (ret < 0) { ret = dict_set_str (reply, "ERROR", diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h index 9165249d49a..76a78ac3ede 100644 --- a/xlators/protocol/server/src/server-mem-types.h +++ b/xlators/protocol/server/src/server-mem-types.h @@ -28,6 +28,7 @@ enum gf_server_mem_types_ { gf_server_mt_setvolume_rsp_t, gf_server_mt_lock_mig_t, gf_server_mt_compound_rsp_t, + gf_server_mt_child_status, gf_server_mt_end, }; #endif /* __SERVER_MEM_TYPES_H__ */ diff --git a/xlators/protocol/server/src/server-messages.h b/xlators/protocol/server/src/server-messages.h index b8245af18df..14729ade852 100644 --- a/xlators/protocol/server/src/server-messages.h +++ b/xlators/protocol/server/src/server-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_PS_BASE GLFS_MSGID_COMP_PS -#define GLFS_NUM_MESSAGES 91 +#define GLFS_NUM_MESSAGES 92 #define GLFS_MSGID_END (GLFS_PS_BASE + GLFS_NUM_MESSAGES + 1) /* Messages with message IDs */ #define glfs_msg_start_x GLFS_PS_BASE, "Invalid: Start of messages" @@ -857,6 +857,14 @@ */ #define PS_MSG_CLIENT_OPVERSION_GET_FAILED (GLFS_PS_BASE + 91) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define PS_MSG_CHILD_STATUS_FAILED (GLFS_PS_BASE + 92) /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 202fe71ba55..130a55372a6 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -23,6 +23,7 @@ #include "event.h" #include "events.h" #include "server-messages.h" +#include "glusterfsd.h" rpcsvc_cbk_program_t server_cbk_prog = { .progname = "Gluster Callback", @@ -1065,7 +1066,9 @@ init (xlator_t *this) if (ret) conf->conf_dir = CONFDIR; - conf->child_up = _gf_false; + conf->child_status = GF_CALLOC (1, sizeof (struct _child_status), + gf_server_mt_child_status); + INIT_LIST_HEAD (&conf->child_status->status_list); /*ret = dict_get_str (this->options, "statedump-path", &statedump_path); if (!ret) { @@ -1389,10 +1392,15 @@ server_process_child_event (xlator_t *this, int32_t event, void *data, pthread_mutex_lock (&conf->mutex); { list_for_each_entry (xprt, &conf->xprt_list, list) { - rpcsvc_callback_submit (conf->rpc, xprt, - &server_cbk_prog, - cbk_procnum, - NULL, 0, NULL); + if (!xprt->xl_private) { + continue; + } + if (xprt->xl_private->bound_xl == data) { + rpcsvc_callback_submit (conf->rpc, xprt, + &server_cbk_prog, + cbk_procnum, + NULL, 0, NULL); + } } } pthread_mutex_unlock (&conf->mutex); @@ -1409,10 +1417,19 @@ notify (xlator_t *this, int32_t event, void *data, ...) server_conf_t *conf = NULL; rpc_transport_t *xprt = NULL; rpc_transport_t *xp_next = NULL; + xlator_t *victim = NULL; + xlator_t *top = NULL; + xlator_t *travxl = NULL; + xlator_list_t **trav_p = NULL; + struct _child_status *tmp = NULL; + gf_boolean_t victim_found = _gf_false; + glusterfs_ctx_t *ctx = NULL; GF_VALIDATE_OR_GOTO (THIS->name, this, out); conf = this->private; GF_VALIDATE_OR_GOTO (this->name, conf, out); + victim = data; + ctx = THIS->ctx; switch (event) { case GF_EVENT_UPCALL: @@ -1441,7 +1458,24 @@ notify (xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_CHILD_UP: { - conf->child_up = _gf_true; + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (tmp->name == NULL) + break; + if (strcmp (tmp->name, victim->name) == 0) + break; + } + if (tmp->name) { + tmp->child_up = _gf_true; + } else { + tmp = GF_CALLOC (1, sizeof (struct _child_status), + gf_server_mt_child_status); + INIT_LIST_HEAD (&tmp->status_list); + tmp->name = gf_strdup (victim->name); + tmp->child_up = _gf_true; + list_add_tail (&tmp->status_list, + &conf->child_status->status_list); + } ret = server_process_child_event (this, event, data, GF_CBK_CHILD_UP); if (ret) { @@ -1450,14 +1484,25 @@ notify (xlator_t *this, int32_t event, void *data, ...) "server_process_child_event failed"); goto out; } - default_notify (this, event, data); break; } case GF_EVENT_CHILD_DOWN: { - conf->child_up = _gf_false; + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (strcmp (tmp->name, victim->name) == 0) { + tmp->child_up = _gf_false; + break; + } + } + if (!tmp->name) + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_CHILD_STATUS_FAILED, + "No xlator %s is found in " + "child status list", victim->name); + ret = server_process_child_event (this, event, data, GF_CBK_CHILD_DOWN); if (ret) { @@ -1466,13 +1511,12 @@ notify (xlator_t *this, int32_t event, void *data, ...) "server_process_child_event failed"); goto out; } - default_notify (this, event, data); break; } - case GF_EVENT_TRANSPORT_CLEANUP: + case GF_EVENT_CLEANUP: conf = this->private; pthread_mutex_lock (&conf->mutex); /* @@ -1493,8 +1537,37 @@ notify (xlator_t *this, int32_t event, void *data, ...) rpc_transport_disconnect (xprt, _gf_false); } } + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (strcmp (tmp->name, victim->name) == 0) + break; + } + if (tmp->name && (strcmp (tmp->name, victim->name) == 0)) { + GF_FREE (tmp->name); + list_del (&tmp->status_list); + } pthread_mutex_unlock (&conf->mutex); - /* NB: do *not* propagate anywhere else */ + if (this->ctx->active) { + top = this->ctx->active->first; + LOCK (&ctx->volfile_lock); + for (trav_p = &top->children; *trav_p; + trav_p = &(*trav_p)->next) { + travxl = (*trav_p)->xlator; + if (travxl && + strcmp (travxl->name, victim->name) == 0) { + victim_found = _gf_true; + break; + } + } + UNLOCK (&ctx->volfile_lock); + if (victim_found) + (*trav_p) = (*trav_p)->next; + glusterfs_mgmt_pmap_signout (glusterfsd_ctx, + victim->name); + glusterfs_autoscale_threads (THIS->ctx, -1); + default_notify (victim, GF_EVENT_CLEANUP, data); + + } break; default: diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 0b37eb1414a..09a2d74ff2b 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -73,6 +73,12 @@ struct _volfile_ctx { uint32_t checksum; }; +struct _child_status { + struct list_head status_list; + char *name; + gf_boolean_t child_up; + +}; struct server_conf { rpcsvc_t *rpc; struct rpcsvc_config rpc_conf; @@ -101,9 +107,7 @@ struct server_conf { * in case if volume set options * (say *.allow | *.reject) are * tweeked */ - gf_boolean_t child_up; /* Set to true, when child is up, and - * false, when child is down */ - + struct _child_status *child_status; gf_lock_t itable_lock; }; typedef struct server_conf server_conf_t; -- cgit