From 0043c63f70776444f69667a4ef9596217ecb42b7 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Mon, 12 Mar 2018 19:43:15 +0530 Subject: gluster: Sometimes Brick process is crashed at the time of stopping brick Problem: Sometimes brick process is getting crashed at the time of stop brick while brick mux is enabled. Solution: Brick process was getting crashed because of rpc connection was not cleaning properly while brick mux is enabled.In this patch after sending GF_EVENT_CLEANUP notification to xlator(server) waits for all rpc client connection destroy for specific xlator.Once rpc connections are destroyed in server_rpc_notify for all associated client for that brick then call xlator_mem_cleanup for for brick xlator as well as all child xlators.To avoid races at the time of cleanup introduce two new flags at each xlator cleanup_starting, call_cleanup. BUG: 1544090 Signed-off-by: Mohit Agrawal Note: Run all test-cases in separate build (https://review.gluster.org/#/c/19700/) with same patch after enable brick mux forcefully, all test cases are passed. Change-Id: Ic4ab9c128df282d146cf1135640281fcb31997bf updates: bz#1544090 --- glusterfsd/src/glusterfsd-mgmt.c | 96 +++++++++--- libglusterfs/src/client_t.c | 4 +- libglusterfs/src/fd.c | 4 +- libglusterfs/src/graph.c | 6 +- libglusterfs/src/inode.c | 2 +- libglusterfs/src/libglusterfs.sym | 1 + libglusterfs/src/statedump.c | 61 ++++++-- libglusterfs/src/xlator.c | 8 +- libglusterfs/src/xlator.h | 11 +- .../glusterd/rebalance-operations-in-single-node.t | 2 +- tests/volume.rc | 31 +++- xlators/features/changelog/src/changelog-rpc.c | 9 ++ xlators/features/changelog/src/changelog.c | 5 + xlators/features/trash/src/trash.c | 18 ++- xlators/performance/io-threads/src/io-threads.c | 2 +- xlators/protocol/server/src/server-handshake.c | 25 ++- xlators/protocol/server/src/server.c | 170 +++++++++++++++------ xlators/protocol/server/src/server.h | 2 +- xlators/storage/posix/src/posix-common.c | 6 +- xlators/storage/posix/src/posix-helpers.c | 14 +- 20 files changed, 365 insertions(+), 112 deletions(-) diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index bce8d5cc276..e6c8a27e9e8 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -194,11 +194,6 @@ xlator_mem_free (xlator_t *xl) if (!xl) return 0; - GF_FREE (xl->name); - GF_FREE (xl->type); - xl->name = NULL; - xl->type = NULL; - if (xl->options) { dict_ref (xl->options); dict_unref (xl->options); @@ -210,25 +205,38 @@ xlator_mem_free (xlator_t *xl) GF_FREE (vol_opt); } + xlator_memrec_free (xl); + return 0; } void xlator_call_fini (xlator_t *this) { - if (!this) + if (!this || this->cleanup_starting) return; + this->cleanup_starting = 1; + this->call_cleanup = 1; xlator_call_fini (this->next); this->fini (this); } void xlator_mem_cleanup (xlator_t *this) { - xlator_list_t *list = this->children; - xlator_t *trav = list->xlator; - inode_table_t *inode_table = NULL; - xlator_t *prev = trav; + xlator_list_t *list = this->children; + xlator_t *trav = list->xlator; + inode_table_t *inode_table = NULL; + xlator_t *prev = trav; + glusterfs_ctx_t *ctx = NULL; + xlator_list_t **trav_p = NULL; + xlator_t *top = NULL; + xlator_t *victim = NULL; - inode_table = this->itable; + + if (this->call_cleanup) + return; + + this->call_cleanup = 1; + ctx = glusterfsd_ctx; xlator_call_fini (trav); @@ -238,6 +246,7 @@ xlator_mem_cleanup (xlator_t *this) { prev = trav; } + inode_table = this->itable; if (inode_table) { inode_table_destroy (inode_table); this->itable = NULL; @@ -249,6 +258,33 @@ xlator_mem_cleanup (xlator_t *this) { xlator_mem_free (this); + if (glusterfsd_ctx->active) { + top = glusterfsd_ctx->active->first; + LOCK (&ctx->volfile_lock); + /* TODO here we have leak for xlator node in a graph */ + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (victim->call_cleanup && !strcmp (victim->name, this->name)) { + (*trav_p) = (*trav_p)->next; + break; + } + } + /* TODO Sometime brick xlator is not moved from graph so followed below + approach to move brick xlator from a graph, will move specific brick + xlator from graph only while inode table and mem_acct are cleaned up + */ + trav_p = &top->children; + while (*trav_p) { + victim = (*trav_p)->xlator; + if (victim->call_cleanup && !victim->itable && !victim->mem_acct) { + (*trav_p) = (*trav_p)->next; + } else { + trav_p = &(*trav_p)->next; + } + } + UNLOCK (&ctx->volfile_lock); + } + } @@ -260,8 +296,10 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) glusterfs_ctx_t *ctx = NULL; xlator_t *top = NULL; xlator_t *victim = NULL; + xlator_t *tvictim = NULL; xlator_list_t **trav_p = NULL; gf_boolean_t lockflag = _gf_false; + gf_boolean_t last_brick = _gf_false; ret = xdr_to_generic (req->msg[0], &xlator_req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -279,11 +317,15 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { victim = (*trav_p)->xlator; - if (strcmp (victim->name, xlator_req.name) == 0) { + if (!victim->cleanup_starting && strcmp (victim->name, xlator_req.name) == 0) { break; } } } + + if (!top) + goto err; + } if (!*trav_p) { gf_log (THIS->name, GF_LOG_ERROR, @@ -299,26 +341,38 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) } glusterfs_terminate_response_send (req, 0); - if ((trav_p == &top->children) && !(*trav_p)->next) { + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + tvictim = (*trav_p)->xlator; + if (!tvictim->cleanup_starting && !strcmp (tvictim->name, xlator_req.name)) { + continue; + } + if (!tvictim->cleanup_starting) { + last_brick = _gf_true; + break; + } + } + if (!last_brick) { gf_log (THIS->name, GF_LOG_INFO, "terminating after loss of last child %s", xlator_req.name); rpc_clnt_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); kill (getpid(), SIGTERM); } else { - /* - * This is terribly unsafe without quiescing or shutting - * things down properly but it gets us to the point - * where we can test other stuff. - * - * TBD: finish implementing this "detach" code properly - */ + /* TODO cleanup sequence needs to be done properly for + Quota and Changelog + */ + if (victim->cleanup_starting) + goto err; + + victim->cleanup_starting = 1; + UNLOCK (&ctx->volfile_lock); lockflag = _gf_true; + gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" " child %s", xlator_req.name); top->notify (top, GF_EVENT_CLEANUP, victim); - xlator_mem_cleanup (victim); + } err: if (!lockflag) diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c index 63f4bbb4b06..4596db3470f 100644 --- a/libglusterfs/src/client_t.c +++ b/libglusterfs/src/client_t.c @@ -338,7 +338,7 @@ gf_client_destroy_recursive (xlator_t *xl, client_t *client) { xlator_list_t *trav; - if (xl->cbks->client_destroy) { + if (!xl->call_cleanup && xl->cbks->client_destroy) { xl->cbks->client_destroy (xl, client); } @@ -398,7 +398,7 @@ gf_client_disconnect_recursive (xlator_t *xl, client_t *client) int ret = 0; xlator_list_t *trav; - if (xl->cbks->client_disconnect) { + if (!xl->call_cleanup && xl->cbks->client_disconnect) { ret = xl->cbks->client_disconnect (xl, client); } diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index d31b106aa8b..30a7494540d 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -484,7 +484,7 @@ fd_destroy (fd_t *fd, gf_boolean_t bound) xl = fd->_ctx[i].xl_key; old_THIS = THIS; THIS = xl; - if (xl->cbks->releasedir) + if (!xl->call_cleanup && xl->cbks->releasedir) xl->cbks->releasedir (xl, fd); THIS = old_THIS; } @@ -495,7 +495,7 @@ fd_destroy (fd_t *fd, gf_boolean_t bound) xl = fd->_ctx[i].xl_key; old_THIS = THIS; THIS = xl; - if (xl->cbks->release) + if (!xl->call_cleanup && xl->cbks->release) xl->cbks->release (xl, fd); THIS = old_THIS; } diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index d36cf7b3da5..2d560b7f265 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -841,7 +841,7 @@ is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2) trav2 = trav2->children->xlator; for (ltrav = trav1->children; ltrav; ltrav = ltrav->next) { trav1 = ltrav->xlator; - if (strcmp (trav1->name, trav2->name) == 0) { + if (!trav1->cleanup_starting && !strcmp (trav1->name, trav2->name)) { break; } } @@ -1088,7 +1088,7 @@ glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph, new_xl = FIRST_CHILD (new_xl); for (trav = old_xl->children; trav; trav = trav->next) { - if (strcmp (trav->xlator->name, new_xl->name) == 0) { + if (!trav->xlator->cleanup_starting && !strcmp (trav->xlator->name, new_xl->name)) { return xlator_tree_reconfigure (trav->xlator, new_xl); } } @@ -1237,7 +1237,7 @@ glusterfs_graph_attach (glusterfs_graph_t *orig_graph, char *path, xl->volfile_id[strlen(xl->volfile_id)-4] = '\0'; } - /* TBD: memory leaks everywhere */ + /* TODO memory leaks everywhere need to free graph in case of error */ if (glusterfs_graph_prepare (graph, this->ctx, xl->name)) { gf_log (this->name, GF_LOG_WARNING, "failed to prepare graph for xlator %s", xl->name); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 2100ea3cad2..093683d41da 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -327,7 +327,7 @@ __inode_ctx_free (inode_t *inode) xl = (xlator_t *)(long)inode->_ctx[index].xl_key; old_THIS = THIS; THIS = xl; - if (xl->cbks->forget) + if (!xl->call_cleanup && xl->cbks->forget) xl->cbks->forget (xl, inode); THIS = old_THIS; } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 1d21cfa8465..4b7dcb6f3e6 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -1100,6 +1100,7 @@ xlator_tree_free_members xlator_volopt_dynload xlator_volume_option_get xlator_volume_option_get_list +xlator_memrec_free default_fops gf_fop_list gf_upcall_list diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c index 34b8061425c..412a47b9383 100644 --- a/libglusterfs/src/statedump.c +++ b/libglusterfs/src/statedump.c @@ -501,21 +501,14 @@ gf_proc_dump_dict_info (glusterfs_ctx_t *ctx) (total_pairs / total_dicts)); } -void -gf_proc_dump_xlator_info (xlator_t *top) +static void +gf_proc_dump_per_xlator_info (xlator_t *top) { - xlator_t *trav = NULL; - glusterfs_ctx_t *ctx = NULL; + xlator_t *trav = top; + glusterfs_ctx_t *ctx = top->ctx; char itable_key[1024] = {0,}; - if (!top) - return; - - ctx = top->ctx; - - trav = top; - while (trav) { - + while (trav && !trav->cleanup_starting) { if (ctx->measure_latency) gf_proc_dump_latency_info (trav); @@ -539,7 +532,6 @@ gf_proc_dump_xlator_info (xlator_t *top) if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) && (trav->dumpops->inode)) trav->dumpops->inode (trav); - if (trav->dumpops->fd && GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd)) trav->dumpops->fd (trav); @@ -550,6 +542,30 @@ gf_proc_dump_xlator_info (xlator_t *top) trav = trav->next; } +} + + + +void +gf_proc_dump_xlator_info (xlator_t *top, gf_boolean_t brick_mux) +{ + xlator_t *trav = NULL; + xlator_list_t **trav_p = NULL; + + if (!top) + return; + + trav = top; + gf_proc_dump_per_xlator_info (trav); + + if (brick_mux) { + trav_p = &top->children; + while (*trav_p) { + trav = (*trav_p)->xlator; + gf_proc_dump_per_xlator_info (trav); + trav_p = &(*trav_p)->next; + } + } return; } @@ -803,12 +819,27 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx) char tmp_dump_name[PATH_MAX] = {0,}; char path[PATH_MAX] = {0,}; struct timeval tv = {0,}; + gf_boolean_t is_brick_mux = _gf_false; + xlator_t *top = NULL; + xlator_list_t **trav_p = NULL; + int brick_count = 0; gf_proc_dump_lock (); if (!ctx) goto out; + if (ctx) { + top = ctx->active->first; + for (trav_p = &top->children; *trav_p; + trav_p = &(*trav_p)->next) { + brick_count++; + } + + if (brick_count > 1) + is_brick_mux = _gf_true; + } + if (ctx->cmd_args.brick_name) { GF_REMOVE_SLASH_FROM_PATH (ctx->cmd_args.brick_name, brick_name); } else @@ -868,12 +899,12 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx) if (ctx->master) { gf_proc_dump_add_section ("fuse"); - gf_proc_dump_xlator_info (ctx->master); + gf_proc_dump_xlator_info (ctx->master, _gf_false); } if (ctx->active) { gf_proc_dump_add_section ("active graph - %d", ctx->graph_id); - gf_proc_dump_xlator_info (ctx->active->top); + gf_proc_dump_xlator_info (ctx->active->top, is_brick_mux); } i = 0; diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 466be5e3a3a..ced89c71672 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -629,7 +629,7 @@ get_xlator_by_name_or_type (xlator_t *this, char *target, int is_name) for (trav = this->children; trav; trav = trav->next) { value = is_name ? trav->xlator->name : trav->xlator->type; - if (strcmp(value, target) == 0) { + if (!strcmp(value, target)) { return trav->xlator; } child_xl = get_xlator_by_name_or_type (trav->xlator, target, @@ -722,7 +722,9 @@ xlator_init (xlator_t *xl) } xl->init_succeeded = 1; - + /*xl->cleanup_starting = 0; + xl->call_cleanup = 0; + */ ret = 0; out: return ret; @@ -858,7 +860,7 @@ xlator_list_destroy (xlator_list_t *list) return 0; } -static int +int xlator_memrec_free (xlator_t *xl) { uint32_t i = 0; diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 2f8fed6bb64..3c16758e1a9 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -1043,6 +1043,14 @@ struct _xlator { /* Is this pass_through? */ gf_boolean_t pass_through; struct xlator_fops *pass_through_fops; + + /* cleanup flag to avoid races during xlator cleanup */ + uint32_t cleanup_starting; + + /* flag to avoid recall of xlator_mem_cleanup for xame xlator */ + uint32_t call_cleanup; + + }; typedef struct { @@ -1236,5 +1244,6 @@ copy_opts_to_child (xlator_t *src, xlator_t *dst, char *glob); int glusterfs_delete_volfile_checksum (glusterfs_ctx_t *ctx, const char *volfile_id); - +int +xlator_memrec_free (xlator_t *xl); #endif /* _XLATOR_H */ diff --git a/tests/bugs/glusterd/rebalance-operations-in-single-node.t b/tests/bugs/glusterd/rebalance-operations-in-single-node.t index c0823afebb8..9144b4a5000 100644 --- a/tests/bugs/glusterd/rebalance-operations-in-single-node.t +++ b/tests/bugs/glusterd/rebalance-operations-in-single-node.t @@ -119,7 +119,7 @@ TEST touch $M0/dir{21..30}/files{1..10}; TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{7,8} TEST $CLI volume rebalance $V0 start force -EXPECT_WITHIN 60 "completed" rebalance_status_field $V0 +EXPECT_WITHIN 90 "completed" rebalance_status_field $V0 TEST pkill gluster TEST glusterd diff --git a/tests/volume.rc b/tests/volume.rc index 44428606711..ea8cfb666a1 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -801,26 +801,53 @@ function count_sh_entries() ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l } +function check_brick_multiplex() { + cnt="$(ls /var/log/glusterfs/bricks|wc -l)" + local ret=$($CLI volume info|grep "cluster.brick-multiplex"|cut -d" " -f2) + local bcnt="$(brick_count)" + + if [ $bcnt -ne 1 ]; then + if [ "$ret" = "on" ] || [ $cnt -eq 1 ]; then + echo "Y" + else + echo "N" + fi + else + echo "N" + fi +} + function get_fd_count { local vol=$1 local host=$2 local brick=$3 local fname=$4 + local val="$(check_brick_multiplex)" local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname)) local statedump=$(generate_brick_statedump $vol $host $brick) - local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1) + if [ $val == "N" ]; then + count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1) + else + count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1) + fi rm -f $statedump echo $count } + function get_active_fd_count { local vol=$1 local host=$2 local brick=$3 local fname=$4 + local val="$(check_brick_multiplex)" local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname)) local statedump=$(generate_brick_statedump $vol $host $brick) - local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1) + if [ $val == "N" ]; then + count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1) + else + count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1) + fi rm -f $statedump echo $count } diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c index 4b2b38cad51..852c0694f9a 100644 --- a/xlators/features/changelog/src/changelog-rpc.c +++ b/xlators/features/changelog/src/changelog-rpc.c @@ -258,6 +258,15 @@ changelog_handle_probe (rpcsvc_request_t *req) changelog_probe_req rpc_req = {0,}; changelog_probe_rsp rpc_rsp = {0,}; + + this = req->trans->xl; + if (this->cleanup_starting) { + gf_msg (this->name, GF_LOG_DEBUG, 0, + CHANGELOG_MSG_HANDLE_PROBE_ERROR, + "cleanup_starting flag is already set for xl"); + return 0; + } + ret = xdr_to_generic (req->msg[0], &rpc_req, (xdrproc_t)xdr_changelog_probe_req); if (ret < 0) { diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 19d66b605bb..12997181da4 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -2894,6 +2894,7 @@ void fini (xlator_t *this) { changelog_priv_t *priv = NULL; + struct list_head queue = {0, }; priv = this->private; @@ -2901,6 +2902,10 @@ fini (xlator_t *this) /* terminate RPC server/threads */ changelog_cleanup_rpc (this, priv); + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) + __chlog_barrier_disable (this, &queue); + /* cleanup barrier related objects */ changelog_barrier_pthread_destroy (priv); diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index e8f8b7bf051..8a92685cf4b 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -2616,16 +2616,24 @@ fini (xlator_t *this) GF_VALIDATE_OR_GOTO ("trash", this, out); priv = this->private; - inode_table = priv->trash_itable; if (priv) { - if (priv->newtrash_dir) + inode_table = priv->trash_itable; + if (priv->newtrash_dir) { GF_FREE (priv->newtrash_dir); - if (priv->oldtrash_dir) + priv->newtrash_dir = NULL; + } + if (priv->oldtrash_dir) { GF_FREE (priv->oldtrash_dir); - if (priv->brick_path) + priv->oldtrash_dir = NULL; + } + if (priv->brick_path) { GF_FREE (priv->brick_path); - if (priv->eliminate) + priv->brick_path = NULL; + } + if (priv->eliminate) { wipe_eliminate_path (&priv->eliminate); + priv->eliminate = NULL; + } if (inode_table) { inode_table_destroy (inode_table); priv->trash_itable = NULL; diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 4531137c936..49a515712f5 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -228,7 +228,7 @@ iot_worker (void *data) "Dropping poisoned request %p.", stub); call_stub_destroy (stub); } else { - call_resume (stub); + call_resume (stub); } } stub = NULL; diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index de90a6b8eda..08f76de9748 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -474,6 +474,7 @@ server_setvolume (rpcsvc_request_t *req) struct _child_status *tmp = NULL; char *subdir_mount = NULL; char *client_name = NULL; + gf_boolean_t cleanup_starting = _gf_false; params = dict_new (); reply = dict_new (); @@ -575,11 +576,13 @@ server_setvolume (rpcsvc_request_t *req) "initialised yet. Try again later"); goto fail; } + list_for_each_entry (tmp, &conf->child_status->status_list, status_list) { if (strcmp (tmp->name, name) == 0) break; } + if (!tmp->name) { gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_CHILD_STATUS_FAILED, @@ -593,6 +596,7 @@ server_setvolume (rpcsvc_request_t *req) "Failed to set 'child_up' for xlator %s " "in the reply dict", tmp->name); } + ret = dict_get_str (params, "process-uuid", &client_uid); if (ret < 0) { ret = dict_set_str (reply, "ERROR", @@ -634,8 +638,27 @@ server_setvolume (rpcsvc_request_t *req) goto fail; } - if (req->trans->xl_private != client) + pthread_mutex_lock (&conf->mutex); + if (xl->cleanup_starting) { + cleanup_starting = _gf_true; + } else if (req->trans->xl_private != client) { req->trans->xl_private = client; + } + pthread_mutex_unlock (&conf->mutex); + + if (cleanup_starting) { + op_ret = -1; + op_errno = EAGAIN; + + ret = dict_set_str (reply, "ERROR", + "cleanup flag is set for xlator. " + " Try again later"); + if (ret < 0) + gf_msg_debug (this->name, 0, "failed to set error: " + "cleanup flag is set for xlator. " + "Try again later"); + goto fail; + } auth_set_username_passwd (params, config_params, client); if (req->trans->ssl_name) { diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index fe1fb71a7ef..03138689b14 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -423,7 +423,16 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, server_conf_t *conf = NULL; client_t *client = NULL; char *auth_path = NULL; - int ret = -1; + int ret = -1; + gf_boolean_t victim_found = _gf_false; + char *xlator_name = NULL; + glusterfs_ctx_t *ctx = NULL; + xlator_t *top = NULL; + xlator_list_t **trav_p = NULL; + xlator_t *travxl = NULL; + uint64_t xprtrefcount = 0; + struct _child_status *tmp = NULL; + if (!xl || !data) { gf_msg_callingfn ("server", GF_LOG_WARNING, 0, @@ -435,6 +444,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, this = xl; trans = data; conf = this->private; + ctx = glusterfsd_ctx; switch (event) { case RPCSVC_EVENT_ACCEPT: @@ -520,9 +530,47 @@ unref_transport: client = trans->xl_private; if (!client) break; + pthread_mutex_lock (&conf->mutex); + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (tmp->name && client->bound_xl && + client->bound_xl->cleanup_starting && + !strcmp (tmp->name, client->bound_xl->name)) { + xprtrefcount = GF_ATOMIC_GET (tmp->xprtrefcnt); + if (xprtrefcount > 0) { + xprtrefcount = GF_ATOMIC_DEC (tmp->xprtrefcnt); + if (xprtrefcount == 0) + xlator_name = gf_strdup(client->bound_xl->name); + } + break; + } + } + pthread_mutex_unlock (&conf->mutex); gf_client_unref (client); + if (xlator_name) { + if (this->ctx->active) { + top = this->ctx->active->first; + LOCK (&ctx->volfile_lock); + for (trav_p = &top->children; *trav_p; + trav_p = &(*trav_p)->next) { + travxl = (*trav_p)->xlator; + if (!travxl->call_cleanup && + strcmp (travxl->name, xlator_name) == 0) { + victim_found = _gf_true; + break; + } + } + UNLOCK (&ctx->volfile_lock); + if (victim_found) { + xlator_mem_cleanup (travxl); + rpcsvc_autoscale_threads (ctx, conf->rpc, -1); + } + } + GF_FREE (xlator_name); + } + trans->xl_private = NULL; break; default: @@ -966,6 +1014,7 @@ server_init (xlator_t *this) conf->child_status = GF_CALLOC (1, sizeof (struct _child_status), gf_server_mt_child_status); INIT_LIST_HEAD (&conf->child_status->status_list); + GF_ATOMIC_INIT (conf->child_status->xprtrefcnt, 0); /*ret = dict_get_str (this->options, "statedump-path", &statedump_path); if (!ret) { @@ -1331,14 +1380,53 @@ server_process_child_event (xlator_t *this, int32_t event, void *data, int ret = -1; server_conf_t *conf = NULL; rpc_transport_t *xprt = NULL; + xlator_t *victim = NULL; + struct _child_status *tmp = NULL; GF_VALIDATE_OR_GOTO(this->name, data, out); conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); + victim = data; pthread_mutex_lock (&conf->mutex); { + if (cbk_procnum == GF_CBK_CHILD_UP) { + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (tmp->name == NULL) + break; + if (strcmp (tmp->name, victim->name) == 0) + break; + } + if (tmp->name) { + tmp->child_up = _gf_true; + } else { + tmp = GF_CALLOC (1, sizeof (struct _child_status), + gf_server_mt_child_status); + INIT_LIST_HEAD (&tmp->status_list); + tmp->name = gf_strdup (victim->name); + tmp->child_up = _gf_true; + list_add_tail (&tmp->status_list, + &conf->child_status->status_list); + } + } + + if (cbk_procnum == GF_CBK_CHILD_DOWN) { + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (strcmp (tmp->name, victim->name) == 0) { + tmp->child_up = _gf_false; + break; + } + } + + if (!tmp->name) + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_CHILD_STATUS_FAILED, + "No xlator %s is found in " + "child status list", victim->name); + } list_for_each_entry (xprt, &conf->xprt_list, list) { if (!xprt->xl_private) { continue; @@ -1372,6 +1460,8 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) struct _child_status *tmp = NULL; gf_boolean_t victim_found = _gf_false; glusterfs_ctx_t *ctx = NULL; + gf_boolean_t xprt_found = _gf_false; + uint64_t totxprt = 0; GF_VALIDATE_OR_GOTO (THIS->name, this, out); conf = this->private; @@ -1406,24 +1496,6 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_CHILD_UP: { - list_for_each_entry (tmp, &conf->child_status->status_list, - status_list) { - if (tmp->name == NULL) - break; - if (strcmp (tmp->name, victim->name) == 0) - break; - } - if (tmp->name) { - tmp->child_up = _gf_true; - } else { - tmp = GF_CALLOC (1, sizeof (struct _child_status), - gf_server_mt_child_status); - INIT_LIST_HEAD (&tmp->status_list); - tmp->name = gf_strdup (victim->name); - tmp->child_up = _gf_true; - list_add_tail (&tmp->status_list, - &conf->child_status->status_list); - } ret = server_process_child_event (this, event, data, GF_CBK_CHILD_UP); if (ret) { @@ -1438,19 +1510,6 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_CHILD_DOWN: { - list_for_each_entry (tmp, &conf->child_status->status_list, - status_list) { - if (strcmp (tmp->name, victim->name) == 0) { - tmp->child_up = _gf_false; - break; - } - } - if (!tmp->name) - gf_msg (this->name, GF_LOG_ERROR, 0, - PS_MSG_CHILD_STATUS_FAILED, - "No xlator %s is found in " - "child status list", victim->name); - ret = server_process_child_event (this, event, data, GF_CBK_CHILD_DOWN); if (ret) { @@ -1467,6 +1526,28 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_CLEANUP: conf = this->private; pthread_mutex_lock (&conf->mutex); + /* Calculate total no. of xprt available in list for this + brick xlator + */ + list_for_each_entry_safe (xprt, xp_next, + &conf->xprt_list, list) { + if (!xprt->xl_private) { + continue; + } + if (xprt->xl_private->bound_xl == data) { + totxprt++; + } + } + + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (strcmp (tmp->name, victim->name) == 0) { + tmp->child_up = _gf_false; + GF_ATOMIC_INIT (tmp->xprtrefcnt, totxprt); + break; + } + } + /* * Disconnecting will (usually) drop the last ref, which will * cause the transport to be unlinked and freed while we're @@ -1482,18 +1563,11 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) gf_log (this->name, GF_LOG_INFO, "disconnecting %s", xprt->peerinfo.identifier); + xprt_found = _gf_true; rpc_transport_disconnect (xprt, _gf_false); } } - list_for_each_entry (tmp, &conf->child_status->status_list, - status_list) { - if (strcmp (tmp->name, victim->name) == 0) - break; - } - if (tmp->name && (strcmp (tmp->name, victim->name) == 0)) { - GF_FREE (tmp->name); - list_del (&tmp->status_list); - } + pthread_mutex_unlock (&conf->mutex); if (this->ctx->active) { top = this->ctx->active->first; @@ -1501,8 +1575,8 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { travxl = (*trav_p)->xlator; - if (travxl && - strcmp (travxl->name, victim->name) == 0) { + if (!travxl->call_cleanup && + strcmp (travxl->name, victim->name) == 0) { victim_found = _gf_true; break; } @@ -1511,11 +1585,13 @@ server_notify (xlator_t *this, int32_t event, void *data, ...) glusterfs_delete_volfile_checksum (ctx, victim->volfile_id); UNLOCK (&ctx->volfile_lock); - if (victim_found) - (*trav_p) = (*trav_p)->next; + rpc_clnt_mgmt_pmap_signout (ctx, victim->name); - /* we need the protocol/server xlator here as 'this' */ - rpcsvc_autoscale_threads (ctx, conf->rpc, -1); + + if (!xprt_found && victim_found) { + xlator_mem_cleanup (victim); + rpcsvc_autoscale_threads (ctx, conf->rpc, -1); + } } break; diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 393219bf290..ea1fbf92919 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -99,7 +99,7 @@ struct _child_status { struct list_head status_list; char *name; gf_boolean_t child_up; - + gf_atomic_t xprtrefcnt; }; struct server_conf { rpcsvc_t *rpc; diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 507bfc20991..bcaad2703e9 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -1105,12 +1105,13 @@ posix_fini (xlator_t *this) struct posix_private *priv = this->private; if (!priv) return; - this->private = NULL; - if (priv->health_check) { + LOCK (&priv->lock); + if (priv->health_check_active) { priv->health_check_active = _gf_false; pthread_cancel (priv->health_check); priv->health_check = 0; } + UNLOCK (&priv->lock); if (priv->disk_space_check) { priv->disk_space_check_active = _gf_false; pthread_cancel (priv->disk_space_check); @@ -1135,6 +1136,7 @@ posix_fini (xlator_t *this) GF_FREE (priv->hostname); GF_FREE (priv->trash_path); GF_FREE (priv); + this->private = NULL; return; } diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 0ff94df944e..e9d379fda07 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -2001,6 +2001,12 @@ out: return NULL; abort: + LOCK (&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK (&priv->lock); + /* health-check failed */ gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED, "health-check failed, going down"); @@ -2041,18 +2047,18 @@ abort: for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { victim = (*trav_p)->xlator; - if (victim && - strcmp (victim->name, priv->base_path) == 0) { + if (!victim->call_cleanup && + strcmp (victim->name, priv->base_path) == 0) { victim_found = _gf_true; break; } } UNLOCK (&ctx->volfile_lock); - if (victim_found) { + if (victim_found && !victim->cleanup_starting) { gf_log (THIS->name, GF_LOG_INFO, "detaching not-only " " child %s", priv->base_path); + victim->cleanup_starting = 1; top->notify (top, GF_EVENT_CLEANUP, victim); - xlator_mem_cleanup (victim); } } -- cgit