From 5bc4594dabc08fd4de1940c044946e33037f2ac7 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Tue, 2 Oct 2018 08:54:28 +0530 Subject: core: glusterfsd keeping fd open in index xlator Problem: Current resource cleanup sequence is not perfect while brick mux is enabled Solution: 1) Destroying xprt after cleanup all fd associated with a client 2) Before call fini for brick xlators ensure no stub should be running on a brick Change-Id: I86195785e428f57d3ef0da3e4061021fafacd435 fixes: bz#1631357 Signed-off-by: Mohit Agrawal --- xlators/protocol/server/src/server-handshake.c | 25 +++- xlators/protocol/server/src/server-helpers.c | 76 ++++++++++- xlators/protocol/server/src/server-helpers.h | 4 +- xlators/protocol/server/src/server.c | 178 +++++++++++++++++-------- xlators/protocol/server/src/server.h | 9 +- 5 files changed, 223 insertions(+), 69 deletions(-) (limited to 'xlators/protocol') diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 30e7e4b0478..9ba6f0b0a95 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -592,6 +592,7 @@ server_setvolume(rpcsvc_request_t *req) goto fail; } + pthread_mutex_lock(&conf->mutex); list_for_each_entry(tmp, &conf->child_status->status_list, status_list) { if (strcmp(tmp->name, name) == 0) @@ -599,10 +600,8 @@ server_setvolume(rpcsvc_request_t *req) } if (!tmp->name) { - gf_msg(this->name, GF_LOG_ERROR, 0, PS_MSG_CHILD_STATUS_FAILED, - "No xlator %s is found in " - "child status list", - name); + gf_msg(this->name, GF_LOG_INFO, 0, PS_MSG_CHILD_STATUS_FAILED, + "No xlator %s is found in child status list", name); } else { ret = dict_set_int32(reply, "child_up", tmp->child_up); if (ret < 0) @@ -610,7 +609,21 @@ server_setvolume(rpcsvc_request_t *req) "Failed to set 'child_up' for xlator %s " "in the reply dict", tmp->name); + if (!tmp->child_up) { + ret = dict_set_str(reply, "ERROR", + "Not received child_up for this xlator"); + if (ret < 0) + gf_msg_debug(this->name, 0, "failed to set error msg"); + + gf_msg(this->name, GF_LOG_ERROR, 0, PS_MSG_CHILD_STATUS_FAILED, + "Not received child_up for this xlator %s", name); + op_ret = -1; + op_errno = EAGAIN; + pthread_mutex_unlock(&conf->mutex); + goto fail; + } } + pthread_mutex_unlock(&conf->mutex); ret = dict_get_str(params, "process-uuid", &client_uid); if (ret < 0) { @@ -798,8 +811,8 @@ server_setvolume(rpcsvc_request_t *req) req->trans->clnt_options = dict_ref(params); gf_msg(this->name, GF_LOG_INFO, 0, PS_MSG_CLIENT_ACCEPTED, - "accepted client from %s (version: %s)", client->client_uid, - (clnt_version) ? clnt_version : "old"); + "accepted client from %s (version: %s) with subvol %s", + client->client_uid, (clnt_version) ? clnt_version : "old", name); gf_event(EVENT_CLIENT_CONNECT, "client_uid=%s;" diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index cb2a73c7168..3e27998897e 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -233,16 +233,51 @@ server_connection_cleanup_flush_cbk(call_frame_t *frame, void *cookie, int32_t ret = -1; fd_t *fd = NULL; client_t *client = NULL; + uint64_t fd_cnt = 0; + xlator_t *victim = NULL; + server_conf_t *conf = NULL; + xlator_t *serv_xl = NULL; + rpc_transport_t *xprt = NULL; + rpc_transport_t *xp_next = NULL; + int32_t detach = (long)cookie; + gf_boolean_t xprt_found = _gf_false; GF_VALIDATE_OR_GOTO("server", this, out); GF_VALIDATE_OR_GOTO("server", frame, out); fd = frame->local; client = frame->root->client; + serv_xl = frame->this; + conf = serv_xl->private; fd_unref(fd); frame->local = NULL; + if (client) + victim = client->bound_xl; + + if (victim) { + fd_cnt = GF_ATOMIC_DEC(victim->fd_cnt); + if (!fd_cnt && conf && detach) { + pthread_mutex_lock(&conf->mutex); + { + list_for_each_entry_safe(xprt, xp_next, &conf->xprt_list, list) + { + if (!xprt->xl_private) + continue; + if (xprt->xl_private == client) { + xprt_found = _gf_true; + break; + } + } + } + pthread_mutex_unlock(&conf->mutex); + if (xprt_found) { + rpc_transport_unref(xprt); + } + } + } + gf_client_unref(client); STACK_DESTROY(frame->root); @@ -253,7 +288,7 @@ out: static int do_fd_cleanup(xlator_t *this, client_t *client, fdentry_t *fdentries, - int fd_count) + int fd_count, int32_t detach) { fd_t *fd = NULL; int i = 0, ret = -1; @@ -265,6 +300,7 @@ do_fd_cleanup(xlator_t *this, client_t *client, fdentry_t *fdentries, GF_VALIDATE_OR_GOTO("server", fdentries, out); bound_xl = client->bound_xl; + for (i = 0; i < fd_count; i++) { fd = fdentries[i].fd; @@ -294,8 +330,9 @@ do_fd_cleanup(xlator_t *this, client_t *client, fdentry_t *fdentries, tmp_frame->root->client = client; memset(&tmp_frame->root->lk_owner, 0, sizeof(gf_lkowner_t)); - STACK_WIND(tmp_frame, server_connection_cleanup_flush_cbk, bound_xl, - bound_xl->fops->flush, fd, NULL); + STACK_WIND_COOKIE(tmp_frame, server_connection_cleanup_flush_cbk, + (void *)(long)detach, bound_xl, + bound_xl->fops->flush, fd, NULL); } } @@ -307,13 +344,19 @@ out: } int -server_connection_cleanup(xlator_t *this, client_t *client, int32_t flags) +server_connection_cleanup(xlator_t *this, client_t *client, int32_t flags, + gf_boolean_t *fd_exist) { server_ctx_t *serv_ctx = NULL; fdentry_t *fdentries = NULL; uint32_t fd_count = 0; int cd_ret = 0; int ret = 0; + xlator_t *bound_xl = NULL; + int i = 0; + fd_t *fd = NULL; + uint64_t fd_cnt = 0; + int32_t detach = 0; GF_VALIDATE_OR_GOTO("server", this, out); GF_VALIDATE_OR_GOTO(this->name, client, out); @@ -343,11 +386,34 @@ server_connection_cleanup(xlator_t *this, client_t *client, int32_t flags) } if (fdentries != NULL) { + /* Loop to configure fd_count on victim brick */ + bound_xl = client->bound_xl; + if (bound_xl) { + for (i = 0; i < fd_count; i++) { + fd = fdentries[i].fd; + if (!fd) + continue; + fd_cnt++; + } + if (fd_cnt) { + if (fd_exist) + (*fd_exist) = _gf_true; + GF_ATOMIC_ADD(bound_xl->fd_cnt, fd_cnt); + } + } + + /* If fd_exist is not NULL it means function is invoke + by server_rpc_notify at the time of getting DISCONNECT + notification + */ + if (fd_exist) + detach = 1; + gf_msg_debug(this->name, 0, "Performing cleanup on %d " "fdentries", fd_count); - ret = do_fd_cleanup(this, client, fdentries, fd_count); + ret = do_fd_cleanup(this, client, fdentries, fd_count, detach); } else gf_msg(this->name, GF_LOG_INFO, 0, PS_MSG_FDENTRY_NULL, "no fdentries to clean"); diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h index 20b8d901bd2..9f2e1546831 100644 --- a/xlators/protocol/server/src/server-helpers.h +++ b/xlators/protocol/server/src/server-helpers.h @@ -43,8 +43,8 @@ call_frame_t * get_frame_from_request(rpcsvc_request_t *req); int -server_connection_cleanup(xlator_t *this, struct _client *client, - int32_t flags); +server_connection_cleanup(xlator_t *this, struct _client *client, int32_t flags, + gf_boolean_t *fd_exist); int server_build_config(xlator_t *this, server_conf_t *conf); diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index b9879cdf148..e1ec5512510 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -133,7 +133,7 @@ server_submit_reply(call_frame_t *frame, rpcsvc_request_t *req, void *arg, "Reply submission failed"); if (frame && client) { server_connection_cleanup(frame->this, client, - INTERNAL_LOCKS | POSIX_LOCKS); + INTERNAL_LOCKS | POSIX_LOCKS, NULL); } else { gf_msg_callingfn("", GF_LOG_ERROR, 0, PS_MSG_REPLY_SUBMIT_FAILED, "Reply submission failed"); @@ -392,6 +392,36 @@ out: return error; } +void +server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) +{ + pthread_t th_id = { + 0, + }; + int th_ret = -1; + server_cleanup_xprt_arg_t *arg = NULL; + + if (!victim_name) + return; + + gf_log(this->name, GF_LOG_INFO, "Create graph janitor thread for brick %s", + victim_name); + + arg = calloc(1, sizeof(*arg)); + arg->this = this; + arg->victim_name = gf_strdup(victim_name); + th_ret = gf_thread_create_detached(&th_id, server_graph_janitor_threads, + arg, "graphjanitor"); + if (th_ret) { + gf_log(this->name, GF_LOG_ERROR, + "graph janitor Thread" + " creation is failed for brick %s", + victim_name); + GF_FREE(arg->victim_name); + free(arg); + } +} + int server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) { @@ -402,14 +432,9 @@ server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) client_t *client = NULL; char *auth_path = NULL; int ret = -1; - gf_boolean_t victim_found = _gf_false; char *xlator_name = NULL; - glusterfs_ctx_t *ctx = NULL; - xlator_t *top = NULL; - xlator_list_t **trav_p = NULL; - xlator_t *travxl = NULL; uint64_t xprtrefcount = 0; - struct _child_status *tmp = NULL; + gf_boolean_t fd_exist = _gf_false; if (!xl || !data) { gf_msg_callingfn("server", GF_LOG_WARNING, 0, PS_MSG_RPC_NOTIFY_ERROR, @@ -420,7 +445,6 @@ server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) this = xl; trans = data; conf = this->private; - ctx = this->ctx; switch (event) { case RPCSVC_EVENT_ACCEPT: { @@ -457,7 +481,8 @@ server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) */ pthread_mutex_lock(&conf->mutex); client = trans->xl_private; - list_del_init(&trans->list); + if (!client) + list_del_init(&trans->list); pthread_mutex_unlock(&conf->mutex); if (!client) @@ -478,8 +503,8 @@ server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) gf_client_ref(client); gf_client_put(client, &detached); if (detached) { - server_connection_cleanup(this, client, - INTERNAL_LOCKS | POSIX_LOCKS); + server_connection_cleanup( + this, client, INTERNAL_LOCKS | POSIX_LOCKS, &fd_exist); gf_event(EVENT_CLIENT_DISCONNECT, "client_uid=%s;" "client_identifier=%s;server_identifier=%s;" @@ -496,53 +521,36 @@ server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) unref_transport: /* rpc_transport_unref() causes a RPCSVC_EVENT_TRANSPORT_DESTROY * to be called in blocking manner - * So no code should ideally be after this unref + * So no code should ideally be after this unref, Call + * rpc_transport_unref only while cleanup_starting flag is not set + * otherwise transport_unref will be call by either + * server_connection_cleanup_flush_cbk or server_submit_reply at the + * time of freeing state */ - rpc_transport_unref(trans); + if (!client || !detached || !fd_exist) + rpc_transport_unref(trans); break; case RPCSVC_EVENT_TRANSPORT_DESTROY: + pthread_mutex_lock(&conf->mutex); client = trans->xl_private; + list_del_init(&trans->list); + pthread_mutex_unlock(&conf->mutex); if (!client) break; - pthread_mutex_lock(&conf->mutex); - list_for_each_entry(tmp, &conf->child_status->status_list, - status_list) - { - if (tmp->name && client->bound_xl && - client->bound_xl->cleanup_starting && - !strcmp(tmp->name, client->bound_xl->name)) { - xprtrefcount = GF_ATOMIC_GET(tmp->xprtrefcnt); - if (xprtrefcount > 0) { - xprtrefcount = GF_ATOMIC_DEC(tmp->xprtrefcnt); - if (xprtrefcount == 0) - xlator_name = gf_strdup(client->bound_xl->name); - } - break; + + if (client->bound_xl && client->bound_xl->cleanup_starting) { + xprtrefcount = GF_ATOMIC_GET(client->bound_xl->xprtrefcnt); + if (xprtrefcount > 0) { + xprtrefcount = GF_ATOMIC_DEC(client->bound_xl->xprtrefcnt); + if (xprtrefcount == 0) + xlator_name = gf_strdup(client->bound_xl->name); } } - pthread_mutex_unlock(&conf->mutex); gf_client_unref(client); if (xlator_name) { - if (this->ctx->active) { - top = this->ctx->active->first; - LOCK(&ctx->volfile_lock); - for (trav_p = &top->children; *trav_p; - trav_p = &(*trav_p)->next) { - travxl = (*trav_p)->xlator; - if (!travxl->call_cleanup && - strcmp(travxl->name, xlator_name) == 0) { - victim_found = _gf_true; - break; - } - } - UNLOCK(&ctx->volfile_lock); - if (victim_found) { - xlator_mem_cleanup(travxl); - rpcsvc_autoscale_threads(ctx, conf->rpc, -1); - } - } + server_call_xlator_mem_cleanup(this, xlator_name); GF_FREE(xlator_name); } @@ -556,6 +564,67 @@ out: return 0; } +void * +server_graph_janitor_threads(void *data) +{ + xlator_t *victim = NULL; + xlator_t *this = NULL; + server_conf_t *conf = NULL; + glusterfs_ctx_t *ctx = NULL; + char *victim_name = NULL; + server_cleanup_xprt_arg_t *arg = NULL; + gf_boolean_t victim_found = _gf_false; + xlator_list_t **trav_p = NULL; + xlator_t *top = NULL; + + GF_ASSERT(data); + + arg = data; + this = arg->this; + victim_name = arg->victim_name; + THIS = arg->this; + conf = this->private; + + ctx = THIS->ctx; + GF_VALIDATE_OR_GOTO(this->name, ctx, out); + + top = this->ctx->active->first; + LOCK(&ctx->volfile_lock); + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (victim->cleanup_starting && + strcmp(victim->name, victim_name) == 0) { + victim_found = _gf_true; + break; + } + } + if (victim_found) + glusterfs_delete_volfile_checksum(ctx, victim->volfile_id); + UNLOCK(&ctx->volfile_lock); + if (!victim_found) { + gf_log(this->name, GF_LOG_ERROR, + "victim brick %s is not" + " found in graph", + victim_name); + goto out; + } + + default_notify(victim, GF_EVENT_PARENT_DOWN, victim); + if (victim->notify_down) { + gf_log(THIS->name, GF_LOG_INFO, + "Start call fini for brick" + " %s stack", + victim->name); + xlator_mem_cleanup(victim); + rpcsvc_autoscale_threads(ctx, conf->rpc, -1); + } + +out: + GF_FREE(arg->victim_name); + free(arg); + return NULL; +} + int32_t server_mem_acct_init(xlator_t *this) { @@ -972,13 +1041,7 @@ server_init(xlator_t *this) conf->child_status = GF_CALLOC(1, sizeof(struct _child_status), gf_server_mt_child_status); INIT_LIST_HEAD(&conf->child_status->status_list); - GF_ATOMIC_INIT(conf->child_status->xprtrefcnt, 0); - /*ret = dict_get_str (this->options, "statedump-path", &statedump_path); - if (!ret) { - gf_path_strip_trailing_slashes (statedump_path); - this->ctx->statedump_path = statedump_path; - }*/ GF_OPTION_INIT("statedump-path", statedump_path, path, out); if (statedump_path) { gf_path_strip_trailing_slashes(statedump_path); @@ -1484,6 +1547,12 @@ server_notify(xlator_t *this, int32_t event, void *data, ...) } case GF_EVENT_CHILD_DOWN: { + if (victim->cleanup_starting) { + victim->notify_down = 1; + gf_log(this->name, GF_LOG_INFO, + "Getting CHILD_DOWN event for brick %s", victim->name); + } + ret = server_process_child_event(this, event, data, GF_CBK_CHILD_DOWN); if (ret) { @@ -1517,7 +1586,7 @@ server_notify(xlator_t *this, int32_t event, void *data, ...) { if (strcmp(tmp->name, victim->name) == 0) { tmp->child_up = _gf_false; - GF_ATOMIC_INIT(tmp->xprtrefcnt, totxprt); + GF_ATOMIC_INIT(victim->xprtrefcnt, totxprt); break; } } @@ -1561,8 +1630,7 @@ server_notify(xlator_t *this, int32_t event, void *data, ...) rpc_clnt_mgmt_pmap_signout(ctx, victim->name); if (!xprt_found && victim_found) { - xlator_mem_cleanup(victim); - rpcsvc_autoscale_threads(ctx, conf->rpc, -1); + server_call_xlator_mem_cleanup(this, victim->name); } } break; diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index e6064af076e..a0e0a960c7c 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -100,7 +100,6 @@ struct _child_status { struct list_head status_list; char *name; gf_boolean_t child_up; - gf_atomic_t xprtrefcnt; }; struct server_conf { rpcsvc_t *rpc; @@ -241,6 +240,11 @@ typedef struct _server_ctx { fdtable_t *fdtable; } server_ctx_t; +typedef struct server_cleanup_xprt_arg { + xlator_t *this; + char *victim_name; +} server_cleanup_xprt_arg_t; + int server_submit_reply(call_frame_t *frame, rpcsvc_request_t *req, void *arg, struct iovec *payload, int payloadcount, @@ -254,6 +258,9 @@ gf_server_check_getxattr_cmd(call_frame_t *frame, const char *name); void forget_inode_if_no_dentry(inode_t *inode); +void * +server_graph_janitor_threads(void *); + server_ctx_t * server_ctx_get(client_t *client, xlator_t *xlator); #endif /* !_SERVER_H */ -- cgit