From 4a14159e82d7b736dec686a170b06e961d7aff53 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 8 Jan 2014 17:01:44 +0530 Subject: protocol/client: conn-id should be unique when lk-heal is off Problem: It was observed that in some cases client disconnects and re-connects before server xlator could detect that a disconnect happened. So it still uses previous fdtable and ltable. But it can so happen that in between disconnect and re-connect an 'unlock' fop may fail because the fds are marked 'bad' in client xlator upon disconnect. Due to this stale locks remain on the brick which lead to hangs/self-heals not happening etc. For the exact bug RCA please look at https://bugzilla.redhat.com/show_bug.cgi?id=1049932#c0 Fix: When lk-heal is not enabled make sure connection-id is different for every setvolume. This will make sure that a previous connection's resources are not re-used in server xlator. Change-Id: Id844aaa76dfcf2740db72533bca53c23b2fe5549 BUG: 1049932 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6669 Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi Reviewed-by: Vijay Bellur --- xlators/protocol/client/src/client-handshake.c | 34 ++++++++++++++++++++------ xlators/protocol/client/src/client.h | 5 ++++ 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'xlators') diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index b2aa66422..7c8be42ed 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -465,17 +465,23 @@ client_set_lk_version (xlator_t *this) clnt_conf_t *conf = NULL; call_frame_t *frame = NULL; gf_set_lk_ver_req req = {0, }; + char *process_uuid = NULL; GF_VALIDATE_OR_GOTO ("client", this, err); conf = (clnt_conf_t *) this->private; req.lk_ver = client_get_lk_ver (conf); - ret = gf_asprintf (&req.uid, "%s-%s-%d", - this->ctx->process_uuid, this->name, - this->graph->id); - if (ret == -1) + ret = dict_get_str (this->options, "process-uuid", &process_uuid); + if (!process_uuid) { + ret = -1; goto err; + } + req.uid = gf_strdup (process_uuid); + if (!req.uid) { + ret = -1; + goto err; + } frame = create_frame (this, this->ctx->pool); if (!frame) { @@ -1524,6 +1530,7 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) char *process_uuid_xl = NULL; clnt_conf_t *conf = NULL; dict_t *options = NULL; + char counter_str[32] = {0}; options = this->options; conf = this->private; @@ -1549,13 +1556,24 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) } } - /* With multiple graphs possible in the same process, we need a + /* When lock-heal is enabled: + * With multiple graphs possible in the same process, we need a field to bring the uniqueness. Graph-ID should be enough to get the - job done + job done. + * When lock-heal is disabled, connection-id should always be unique so + * that server never gets to reuse the previous connection resources + * so it cleans up the resources on every disconnect. Otherwise + * it may lead to stale resources, i.e. leaked file desciptors, + * inode/entry locks */ - ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d", + if (!conf->lk_heal) { + snprintf (counter_str, sizeof (counter_str), + "-%"PRIu64, conf->setvol_count); + conf->setvol_count++; + } + ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d%s", this->ctx->process_uuid, this->name, - this->graph->id); + this->graph->id, counter_str); if (-1 == ret) { gf_log (this->name, GF_LOG_ERROR, "asprintf failed while setting process_uuid"); diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index afab2d74f..bc0f5d0e9 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -120,6 +120,11 @@ typedef struct clnt_conf { */ gf_boolean_t filter_o_direct; /* if set, filter O_DIRECT from the flags list of open() */ + /* set volume is the op which results in creating/re-using + * the conn-id and is called once per connection, this remembers + * how manytimes set_volume is called + */ + uint64_t setvol_count; } clnt_conf_t; typedef struct _client_fd_ctx { -- cgit