diff options
Diffstat (limited to 'xlators/protocol/client/src/client-handshake.c')
| -rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 672 |
1 files changed, 395 insertions, 277 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index e1bcd4e1c..5668fea53 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -34,7 +25,8 @@ #include "portmap-xdr.h" #include "rpc-common-xdr.h" -extern rpc_clnt_prog_t clnt3_1_fop_prog; +#define CLIENT_REOPEN_MAX_ATTEMPTS 1024 +extern rpc_clnt_prog_t clnt3_3_fop_prog; extern rpc_clnt_prog_t clnt_pmap_prog; int client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count, @@ -61,7 +53,7 @@ rpc_client_ping_timer_expired (void *data) rpc_clnt_connection_t *conn = NULL; int disconnect = 0; int transport_activity = 0; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; struct timeval current = {0, }; struct rpc_clnt *clnt = NULL; xlator_t *this = NULL; @@ -109,7 +101,7 @@ rpc_client_ping_timer_expired (void *data) "ping timer expired but transport activity " "detected - not bailing transport"); timeout.tv_sec = conf->opt.ping_timeout; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; conn->ping_timer = gf_timer_call_after (this->ctx, timeout, @@ -148,7 +140,7 @@ client_start_ping (void *data) clnt_conf_t *conf = NULL; rpc_clnt_connection_t *conn = NULL; int32_t ret = -1; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; call_frame_t *frame = NULL; int frame_count = 0; @@ -204,7 +196,7 @@ client_start_ping (void *data) } timeout.tv_sec = conf->opt.ping_timeout; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; conn->ping_timer = gf_timer_call_after (this->ctx, timeout, @@ -227,14 +219,14 @@ client_start_ping (void *data) ret = client_submit_request (this, NULL, frame, conf->handshake, GF_HNDSK_PING, client_ping_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)NULL); - if (ret) - goto fail; + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "failed to start ping timer"); + } return; -fail: - gf_log (THIS->name, GF_LOG_ERROR, - "failed to start ping timer"); +fail: if (frame) { STACK_DESTROY (frame->root); } @@ -249,42 +241,47 @@ client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count, { xlator_t *this = NULL; rpc_clnt_connection_t *conn = NULL; - struct timeval timeout = {0, }; + struct timespec timeout = {0, }; call_frame_t *frame = NULL; clnt_conf_t *conf = NULL; if (!myframe) { - gf_log (THIS->name, GF_LOG_WARNING, "frame with the request is NULL"); + gf_log (THIS->name, GF_LOG_WARNING, + "frame with the request is NULL"); goto out; } frame = myframe; this = frame->this; if (!this || !this->private) { - gf_log (THIS->name, GF_LOG_WARNING, "xlator private is not set"); + gf_log (THIS->name, GF_LOG_WARNING, + "xlator private is not set"); goto out; } conf = this->private; conn = &conf->rpc->conn; - if (req->rpc_status == -1) { - if (conn->ping_timer != NULL) { - gf_log (this->name, GF_LOG_WARNING, "socket or ib" - " related error"); - gf_timer_call_cancel (this->ctx, conn->ping_timer); - conn->ping_timer = NULL; - } else { - /* timer expired and transport bailed out */ - gf_log (this->name, GF_LOG_WARNING, "timer must have " - "expired"); - } - goto out; - } - pthread_mutex_lock (&conn->lock); { + if (req->rpc_status == -1) { + if (conn->ping_timer != NULL) { + gf_log (this->name, GF_LOG_WARNING, + "socket or ib related error"); + gf_timer_call_cancel (this->ctx, + conn->ping_timer); + conn->ping_timer = NULL; + } else { + /* timer expired and transport bailed out */ + gf_log (this->name, GF_LOG_WARNING, + "timer must have expired"); + } + + goto unlock; + } + + timeout.tv_sec = conf->opt.ping_timeout; - timeout.tv_usec = 0; + timeout.tv_nsec = 0; gf_timer_call_cancel (this->ctx, conn->ping_timer); @@ -297,6 +294,7 @@ client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count, gf_log (this->name, GF_LOG_WARNING, "failed to set the ping timer"); } +unlock: pthread_mutex_unlock (&conn->lock); out: if (frame) @@ -346,11 +344,11 @@ client3_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count, } out: - STACK_UNWIND_STRICT (getspec, frame, rsp.op_ret, rsp.op_errno, rsp.spec); + CLIENT_STACK_UNWIND (getspec, frame, rsp.op_ret, rsp.op_errno, + rsp.spec); /* Don't use 'GF_FREE', this is allocated by libc */ - if (rsp.spec) - free (rsp.spec); + free (rsp.spec); return 0; } @@ -376,13 +374,14 @@ int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data) NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gf_getspec_req); - if (ret) - goto unwind; + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to send the request"); + } return 0; unwind: - gf_log (this->name, GF_LOG_WARNING, "failed to send the request"); - STACK_UNWIND_STRICT (getspec, frame, -1, op_errno, NULL); + CLIENT_STACK_UNWIND (getspec, frame, -1, op_errno, NULL); return 0; } @@ -404,6 +403,28 @@ client_notify_parents_child_up (xlator_t *this) } int +clnt_fd_lk_reacquire_failed (xlator_t *this, clnt_fd_ctx_t *fdctx, + clnt_conf_t *conf) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + GF_VALIDATE_OR_GOTO (this->name, fdctx, out); + + pthread_mutex_lock (&conf->lock); + { + fdctx->remote_fd = -1; + fdctx->lk_heal_state = GF_LK_HEAL_DONE; + } + pthread_mutex_unlock (&conf->lock); + + ret = 0; +out: + return ret; +} + +int client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -425,7 +446,7 @@ client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov, gf_log (fr->this->name, GF_LOG_WARNING, "xdr decoding failed"); else - gf_log (fr->this->name, GF_LOG_DEBUG, + gf_log (fr->this->name, GF_LOG_INFO, "Server lk version = %d", rsp.lk_ver); ret = 0; @@ -436,6 +457,7 @@ out: return ret; } +//TODO: Check for all released fdctx and destroy them int client_set_lk_version (xlator_t *this) { @@ -444,16 +466,24 @@ client_set_lk_version (xlator_t *this) call_frame_t *frame = NULL; gf_set_lk_ver_req req = {0, }; + GF_VALIDATE_OR_GOTO ("client", this, err); + conf = (clnt_conf_t *) this->private; req.lk_ver = client_get_lk_ver (conf); - req.uid = this->ctx->process_uuid; - - gf_log (this->name, GF_LOG_DEBUG, "Sending SET_LK_VERSION"); + ret = gf_asprintf (&req.uid, "%s-%s-%d", + this->ctx->process_uuid, this->name, + this->graph->id); + if (ret == -1) + goto err; frame = create_frame (this, this->ctx->pool); - if (!frame) + if (!frame) { + ret = -1; goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Sending SET_LK_VERSION"); ret = client_submit_request (this, &req, frame, conf->handshake, @@ -462,26 +492,12 @@ client_set_lk_version (xlator_t *this) NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gf_set_lk_ver_req); out: - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "Failed to send SET_LK_VERSION to server"); - + GF_FREE (req.uid); return ret; -} - -int -client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx) -{ - int ret = 1; - - GF_VALIDATE_OR_GOTO ("client", lk_ctx, out); +err: + gf_log (this->name, GF_LOG_WARNING, + "Failed to send SET_LK_VERSION to server"); - LOCK (&lk_ctx->lock); - { - ret = list_empty (&lk_ctx->lk_list); - } - UNLOCK (&lk_ctx->lock); -out: return ret; } @@ -534,9 +550,8 @@ clnt_fd_lk_local_unref (xlator_t *this, clnt_fd_lk_local_t *local) if (ref == 0) { LOCK_DESTROY (&local->lock); - mem_put (local); + GF_FREE (local); } - ref = 0; out: return ref; } @@ -570,7 +585,6 @@ clnt_mark_fd_bad (clnt_conf_t *conf, clnt_fd_ctx_t *fdctx) pthread_mutex_unlock (&conf->lock); } -// call decrement_reopen_fd_count int clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -585,9 +599,9 @@ clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov, fdctx = (clnt_fd_ctx_t *) frame->local; conf = (clnt_conf_t *) this->private; - clnt_mark_fd_bad (conf, fdctx); + clnt_fd_lk_reacquire_failed (this, fdctx, conf); - decrement_reopen_fd_count (this, conf); + fdctx->reopen_done (fdctx, this); frame->local = NULL; STACK_DESTROY (frame->root); @@ -605,7 +619,7 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx) conf = (clnt_conf_t *) this->private; - frame = create_frame (THIS, THIS->ctx->pool); + frame = create_frame (this, this->ctx->pool); if (!frame) goto out; @@ -617,25 +631,64 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx) clnt_release_reopen_fd_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_releasedir_req); -out: + return 0; + out: if (ret) { - decrement_reopen_fd_count (this, conf); - clnt_mark_fd_bad (conf, fdctx); + clnt_fd_lk_reacquire_failed (this, fdctx, conf); + fdctx->reopen_done (fdctx, this); if (frame) { frame->local = NULL; STACK_DESTROY (frame->root); } } - return 0; } int +clnt_reacquire_lock_error (xlator_t *this, clnt_fd_ctx_t *fdctx, + clnt_conf_t *conf) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, fdctx, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + clnt_release_reopen_fd (this, fdctx); + + ret = 0; +out: + return ret; +} + +gf_boolean_t +clnt_fd_lk_local_error_status (xlator_t *this, + clnt_fd_lk_local_t *local) +{ + gf_boolean_t error = _gf_false; + + LOCK (&local->lock); + { + error = local->error; + } + UNLOCK (&local->lock); + + return error; +} + +int clnt_fd_lk_local_mark_error (xlator_t *this, clnt_fd_lk_local_t *local) { + int32_t ret = -1; + clnt_conf_t *conf = NULL; gf_boolean_t error = _gf_false; + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + conf = (clnt_conf_t *) this->private; + LOCK (&local->lock); { error = local->error; @@ -643,30 +696,30 @@ clnt_fd_lk_local_mark_error (xlator_t *this, } UNLOCK (&local->lock); - if (error) - clnt_release_reopen_fd (this, local->fdctx); - - return 0; + if (!error) + clnt_reacquire_lock_error (this, local->fdctx, conf); + ret = 0; +out: + return ret; } -// Also, I think in reopen_cbk, the fdctx is added to -// saved_fd list.. avoid that, may cause a problem -// Reason: While the locks on the fd are reacquired, a release -// fop may be received by the client-protocol translator -// which will free the fdctx datastructure. int client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int32_t ret = -1; xlator_t *this = NULL; - gf_common_rsp rsp = {0,}; + gfs3_lk_rsp rsp = {0,}; call_frame_t *frame = NULL; + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; clnt_fd_lk_local_t *local = NULL; + struct gf_flock lock = {0,}; frame = (call_frame_t *) myframe; this = frame->this; local = (clnt_fd_lk_local_t *) frame->local; + conf = (clnt_conf_t *) this->private; if (req->rpc_status == -1) { gf_log ("client", GF_LOG_WARNING, @@ -674,7 +727,7 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp); + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lk_rsp); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed"); goto out; @@ -686,15 +739,34 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - // TODO: Add more info to log. - gf_log (this->name, GF_LOG_DEBUG, "Reacquired lock"); + fdctx = local->fdctx; + + gf_proto_flock_to_flock (&rsp.flock, &lock); + + gf_log (this->name, GF_LOG_DEBUG, "%s type lock reacquired on file " + "with gfid %s from %"PRIu64 " to %"PRIu64, + get_lk_type (lock.l_type), uuid_utoa (fdctx->gfid), + lock.l_start, lock.l_start + lock.l_len); + + if (!clnt_fd_lk_local_error_status (this, local) && + clnt_fd_lk_local_unref (this, local) == 0) { + pthread_mutex_lock (&conf->lock); + { + fdctx->lk_heal_state = GF_LK_HEAL_DONE; + } + pthread_mutex_unlock (&conf->lock); + + fdctx->reopen_done (fdctx, this); + } ret = 0; out: - if (ret < 0) + if (ret < 0) { clnt_fd_lk_local_mark_error (this, local); - (void) clnt_fd_lk_local_unref (this, local); + clnt_fd_lk_local_unref (this, local); + } + frame->local = NULL; STACK_DESTROY (frame->root); @@ -720,7 +792,10 @@ _client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) local = clnt_fd_lk_local_create (fdctx); if (!local) { - clnt_release_reopen_fd (this, fdctx); + gf_log (this->name, GF_LOG_WARNING, "clnt_fd_lk_local_create " + "failed, aborting reacquring of locks on %s.", + uuid_utoa (fdctx->gfid)); + clnt_reacquire_lock_error (this, fdctx, conf); goto out; } @@ -728,7 +803,9 @@ _client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) memcpy (&flock, &fd_lk->user_flock, sizeof (struct gf_flock)); - ret = client_cmd_to_gf_cmd (fd_lk->cmd, &gf_cmd); + /* Always send F_SETLK even if the cmd was F_SETLKW */ + /* to avoid frame being blocked if lock cannot be granted. */ + ret = client_cmd_to_gf_cmd (F_SETLK, &gf_cmd); if (ret) { gf_log (this->name, GF_LOG_WARNING, "client_cmd_to_gf_cmd failed, " @@ -743,9 +820,9 @@ _client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) (void) gf_proto_flock_from_flock (&req.flock, &flock); - memcpy (req.gfid, fdctx->inode->gfid, 16); + memcpy (req.gfid, fdctx->gfid, 16); - frame = create_frame (THIS, THIS->ctx->pool); + frame = create_frame (this, this->ctx->pool); if (!frame) { ret = -1; break; @@ -759,24 +836,17 @@ _client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) client_reacquire_lock_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_lk_req); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "reacquiring locks failed on file with gfid %s", + uuid_utoa (fdctx->gfid)); break; + } ret = 0; frame = NULL; } - if (ret) { - clnt_fd_lk_local_mark_error (this, local); - - if (frame) { - if (frame->local) { - clnt_fd_lk_local_unref (this, frame->local); - frame->local = NULL; - } - STACK_DESTROY (frame->root); - } - } if (local) (void) clnt_fd_lk_local_unref (this, local); out: @@ -789,33 +859,87 @@ client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) int32_t ret = -1; fd_lk_ctx_t *lk_ctx = NULL; - if (client_fd_lk_list_empty (fdctx->lk_ctx)) { - gf_log (this->name, GF_LOG_WARNING, + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, fdctx, out); + + if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) { + gf_log (this->name, GF_LOG_DEBUG, "fd lock list is empty"); - decrement_reopen_fd_count (this, (clnt_conf_t *)this->private); - ret = 0; - goto out; + fdctx->reopen_done (fdctx, this); + } else { + lk_ctx = fdctx->lk_ctx; + + LOCK (&lk_ctx->lock); + { + (void) _client_reacquire_lock (this, fdctx); + } + UNLOCK (&lk_ctx->lock); } + ret = 0; +out: + return ret; +} - lk_ctx = fdctx->lk_ctx; +void +client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + gf_log_callingfn (this->name, GF_LOG_WARNING, + "This function should never be called"); +} - LOCK (&lk_ctx->lock); +void +client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + gf_boolean_t destroy = _gf_false; + + conf = this->private; + + pthread_mutex_lock (&conf->lock); { - ret = _client_reacquire_lock (this, fdctx); + fdctx->reopen_attempts = 0; + if (!fdctx->released) + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + else + destroy = _gf_true; + fdctx->reopen_done = client_default_reopen_done; + } + pthread_mutex_unlock (&conf->lock); + + if (destroy) + client_fdctx_destroy (this, fdctx); +} + +void +client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + uint64_t fd_count = 0; + + conf = this->private; + + LOCK (&conf->rec_lock); + { + fd_count = --(conf->reopen_fd_count); + } + UNLOCK (&conf->rec_lock); + + client_reopen_done (fdctx, this); + if (fd_count == 0) { + gf_log (this->name, GF_LOG_INFO, + "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); + client_set_lk_version (this); + client_notify_parents_child_up (this); } - UNLOCK (&lk_ctx->lock); -out: - return ret; } int -client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, +client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int32_t ret = -1; gfs3_open_rsp rsp = {0,}; - int attempt_lock_recovery = _gf_false; - uint64_t fd_count = 0; + gf_boolean_t attempt_lock_recovery = _gf_false; clnt_local_t *local = NULL; clnt_conf_t *conf = NULL; clnt_fd_ctx_t *fdctx = NULL; @@ -823,12 +947,10 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, xlator_t *this = NULL; frame = myframe; - if (!frame || !frame->this) - goto out; - this = frame->this; + conf = this->private; local = frame->local; - conf = frame->this->private; + fdctx = local->fdctx; if (-1 == req->rpc_status) { gf_log (frame->this->name, GF_LOG_WARNING, @@ -861,49 +983,41 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } - fdctx = local->fdctx; - - if (!fdctx) { - gf_log (frame->this->name, GF_LOG_WARNING, "fdctx not found"); - ret = -1; - goto out; - } - pthread_mutex_lock (&conf->lock); { fdctx->remote_fd = rsp.fd; if (!fdctx->released) { - list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); - if (!client_fd_lk_list_empty (fdctx->lk_ctx)) + if (conf->lk_heal && + !client_fd_lk_list_empty (fdctx->lk_ctx, + _gf_false)) { attempt_lock_recovery = _gf_true; - fdctx = NULL; + fdctx->lk_heal_state = GF_LK_HEAL_IN_PROGRESS; + } } } pthread_mutex_unlock (&conf->lock); ret = 0; - if (conf->lk_heal && attempt_lock_recovery) { - /* Delay decrement the reopen fd count untill all the + if (attempt_lock_recovery) { + /* Delay decrementing the reopen fd count untill all the locks corresponding to this fd are acquired.*/ - gf_log (frame->this->name, GF_LOG_WARNING, "acquiring locks on " - "%s", local->loc.path); + gf_log (this->name, GF_LOG_DEBUG, "acquiring locks " + "on %s", local->loc.path); ret = client_reacquire_lock (frame->this, local->fdctx); - } else { - fd_count = decrement_reopen_fd_count (frame->this, conf); + if (ret) { + clnt_reacquire_lock_error (this, local->fdctx, conf); + gf_log (this->name, GF_LOG_WARNING, "acquiring locks " + "failed on %s", local->loc.path); + } } -out: - if (fdctx) - client_fdctx_destroy (this, fdctx); - - if ((ret < 0) && frame && frame->this && conf) - decrement_reopen_fd_count (frame->this, conf); +out: + if (!attempt_lock_recovery) + fdctx->reopen_done (fdctx, this); - if (frame) { - frame->local = NULL; - STACK_DESTROY (frame->root); - } + frame->local = NULL; + STACK_DESTROY (frame->root); client_local_wipe (local); @@ -911,7 +1025,7 @@ out: } int -client3_1_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, +client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int32_t ret = -1; @@ -922,11 +1036,10 @@ client3_1_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, call_frame_t *frame = NULL; frame = myframe; - if (!frame || !frame->this) - goto out; + local = frame->local; + fdctx = local->fdctx; + conf = frame->this->private; - local = frame->local; - conf = frame->this->private; if (-1 == req->rpc_status) { gf_log (frame->this->name, GF_LOG_WARNING, @@ -959,78 +1072,44 @@ client3_1_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } - fdctx = local->fdctx; - if (!fdctx) { - gf_log (frame->this->name, GF_LOG_WARNING, "fdctx not found"); - ret = -1; - goto out; - } - pthread_mutex_lock (&conf->lock); { fdctx->remote_fd = rsp.fd; - - if (!fdctx->released) { - list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); - fdctx = NULL; - } } pthread_mutex_unlock (&conf->lock); - decrement_reopen_fd_count (frame->this, conf); - ret = 0; - out: - if (fdctx) - client_fdctx_destroy (frame->this, fdctx); - - if ((ret < 0) && frame && frame->this && conf) - decrement_reopen_fd_count (frame->this, conf); - - if (frame) { - frame->local = NULL; - STACK_DESTROY (frame->root); - } + fdctx->reopen_done (fdctx, frame->this); + frame->local = NULL; + STACK_DESTROY (frame->root); client_local_wipe (local); return 0; } -int -protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) +static int +protocol_client_reopendir (clnt_fd_ctx_t *fdctx, xlator_t *this) { int ret = -1; gfs3_opendir_req req = {{0,},}; clnt_local_t *local = NULL; - inode_t *inode = NULL; - char *path = NULL; call_frame_t *frame = NULL; clnt_conf_t *conf = NULL; - if (!this || !fdctx) - goto out; - - inode = fdctx->inode; conf = this->private; - ret = inode_path (inode, NULL, &path); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "couldn't build path from inode %s", - uuid_utoa (inode->gfid)); - goto out; - } - local = mem_get0 (this->local_pool); if (!local) { ret = -1; goto out; } - local->fdctx = fdctx; - local->loc.path = path; - path = NULL; + + uuid_copy (local->loc.gfid, fdctx->gfid); + ret = loc_path (&local->loc, NULL); + if (ret < 0) + goto out; frame = create_frame (this, this->ctx->pool); if (!frame) { @@ -1038,27 +1117,26 @@ protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) goto out; } - memcpy (req.gfid, inode->gfid, 16); + memcpy (req.gfid, fdctx->gfid, 16); gf_log (frame->this->name, GF_LOG_DEBUG, "attempting reopen on %s", local->loc.path); - frame->local = local; local = NULL; + frame->local = local; ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_OPENDIR, - client3_1_reopendir_cbk, NULL, + client3_3_reopendir_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_opendir_req); - if (ret) - goto out; + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to send the re-opendir request"); + } - return ret; + return 0; out: - gf_log (THIS->name, GF_LOG_ERROR, - "failed to send the re-opendir request"); - if (frame) { frame->local = NULL; STACK_DESTROY (frame->root); @@ -1067,41 +1145,23 @@ out: if (local) client_local_wipe (local); - if (path) - GF_FREE (path); - if ((ret < 0) && this && conf) { - decrement_reopen_fd_count (this, conf); - } + fdctx->reopen_done (fdctx, this); return 0; } -int -protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx) +static int +protocol_client_reopenfile (clnt_fd_ctx_t *fdctx, xlator_t *this) { int ret = -1; gfs3_open_req req = {{0,},}; clnt_local_t *local = NULL; - inode_t *inode = NULL; - char *path = NULL; call_frame_t *frame = NULL; clnt_conf_t *conf = NULL; - if (!this || !fdctx) - goto out; - - inode = fdctx->inode; conf = this->private; - ret = inode_path (inode, NULL, &path); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "couldn't build path from inode %s", - uuid_utoa (inode->gfid)); - goto out; - } - frame = create_frame (this, this->ctx->pool); if (!frame) { ret = -1; @@ -1115,31 +1175,32 @@ protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx) } local->fdctx = fdctx; - local->loc.path = path; - path = NULL; + uuid_copy (local->loc.gfid, fdctx->gfid); + ret = loc_path (&local->loc, NULL); + if (ret < 0) + goto out; + frame->local = local; - memcpy (req.gfid, inode->gfid, 16); + memcpy (req.gfid, fdctx->gfid, 16); req.flags = gf_flags_from_flags (fdctx->flags); - req.wbflags = fdctx->wbflags; + req.flags = req.flags & (~(O_TRUNC|O_CREAT|O_EXCL)); gf_log (frame->this->name, GF_LOG_DEBUG, "attempting reopen on %s", local->loc.path); - local = NULL; ret = client_submit_request (this, &req, frame, conf->fops, - GFS3_OP_OPEN, client3_1_reopen_cbk, NULL, + GFS3_OP_OPEN, client3_3_reopen_cbk, NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfs3_open_req); - if (ret) - goto out; + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to send the re-open request"); + } - return ret; + return 0; out: - gf_log (THIS->name, GF_LOG_ERROR, - "failed to send the re-open request"); - if (frame) { frame->local = NULL; STACK_DESTROY (frame->root); @@ -1148,17 +1209,65 @@ out: if (local) client_local_wipe (local); - if (path) - GF_FREE (path); - - if ((ret < 0) && this && conf) { - decrement_reopen_fd_count (this, conf); - } + fdctx->reopen_done (fdctx, this); return 0; } +static void +protocol_client_reopen (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + if (fdctx->is_dir) + protocol_client_reopendir (fdctx, this); + else + protocol_client_reopenfile (fdctx, this); +} + +gf_boolean_t +__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx) +{ + if (fdctx->reopen_done == client_default_reopen_done) + return _gf_false; + return _gf_true; +} + +void +client_attempt_reopen (fd_t *fd, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; + gf_boolean_t reopen = _gf_false; + + if (!fd || !this) + goto out; + + conf = this->private; + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + if (!fdctx) + goto unlock; + if (__is_fd_reopen_in_progress (fdctx)) + goto unlock; + if (fdctx->remote_fd != -1) + goto unlock; + + if (fdctx->reopen_attempts == CLIENT_REOPEN_MAX_ATTEMPTS) { + reopen = _gf_true; + fdctx->reopen_done = client_reopen_done; + list_del_init (&fdctx->sfd_pos); + } else { + fdctx->reopen_attempts++; + } + } +unlock: + pthread_mutex_unlock (&conf->lock); + if (reopen) + protocol_client_reopen (fdctx, this); +out: + return; +} int client_post_handshake (call_frame_t *frame, xlator_t *this) @@ -1183,6 +1292,7 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) if (fdctx->remote_fd != -1) continue; + fdctx->reopen_done = client_child_up_reopen_done; list_del_init (&fdctx->sfd_pos); list_add_tail (&fdctx->sfd_pos, &reopen_head); count++; @@ -1201,14 +1311,11 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { list_del_init (&fdctx->sfd_pos); - if (fdctx->is_dir) - protocol_client_reopendir (this, fdctx); - else - protocol_client_reopen (this, fdctx); + protocol_client_reopen (fdctx, this); } } else { gf_log (this->name, GF_LOG_DEBUG, - "no fds to open - notifying all parents child up"); + "No fds to open - notifying all parents child up"); client_set_lk_version (this); client_notify_parents_child_up (this); } @@ -1290,8 +1397,7 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m remote_error ? remote_error : strerror (op_errno)); errno = op_errno; if (remote_error && - (strncmp ("Authentication failed",remote_error, - sizeof (remote_error)) == 0)) { + (strcmp ("Authentication failed", remote_error) == 0)) { auth_fail = _gf_true; op_ret = 0; } @@ -1320,7 +1426,7 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m goto out; } - gf_log (this->name, GF_LOG_INFO, "clnt-lk-version = %d, " + gf_log (this->name, GF_LOG_DEBUG, "clnt-lk-version = %d, " "server-lk-version = %d", client_get_lk_ver (conf), lk_ver); /* TODO: currently setpeer path is broken */ /* @@ -1358,13 +1464,17 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m conf->need_different_port = 0; if (lk_ver != client_get_lk_ver (conf)) { + gf_log (this->name, GF_LOG_INFO, "Server and Client " + "lk-version numbers are not same, reopening the fds"); client_mark_fd_bad (this); client_post_handshake (frame, frame->this); } else { /*TODO: Traverse the saved fd list, and send release to the server on fd's that were closed during grace period */ - ; + gf_log (this->name, GF_LOG_INFO, "Server and Client " + "lk-version numbers are same, no need to " + "reopen the fds"); } out: @@ -1394,8 +1504,7 @@ out: ret = 0; } - if (rsp.dict.dict_val) - free (rsp.dict.dict_val); + free (rsp.dict.dict_val); STACK_DESTROY (frame->root); @@ -1439,8 +1548,13 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) } } - ret = gf_asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid, - this->name); + /* With multiple graphs possible in the same process, we need a + field to bring the uniqueness. Graph-ID should be enough to get the + job done + */ + ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d", + this->ctx->process_uuid, this->name, + this->graph->id); if (-1 == ret) { gf_log (this->name, GF_LOG_ERROR, "asprintf failed while setting process_uuid"); @@ -1485,13 +1599,14 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) client_get_lk_ver (conf)); } - req.dict.dict_len = dict_serialized_length (options); - if (req.dict.dict_len < 0) { + ret = dict_serialized_length (options); + if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to get serialized length of dict"); ret = -1; goto fail; } + req.dict.dict_len = ret; req.dict.dict_val = GF_CALLOC (1, req.dict.dict_len, gf_client_mt_clnt_req_buf_t); ret = dict_serialize (options, req.dict.dict_val); @@ -1511,8 +1626,7 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc) (xdrproc_t)xdr_gf_setvolume_req); fail: - if (req.dict.dict_val) - GF_FREE (req.dict.dict_val); + GF_FREE (req.dict.dict_val); return ret; } @@ -1535,9 +1649,9 @@ select_server_supported_programs (xlator_t *this, gf_prog_detail *prog) while (trav) { /* Select 'programs' */ - if ((clnt3_1_fop_prog.prognum == trav->prognum) && - (clnt3_1_fop_prog.progver == trav->progver)) { - conf->fops = &clnt3_1_fop_prog; + if ((clnt3_3_fop_prog.prognum == trav->prognum) && + (clnt3_3_fop_prog.progver == trav->progver)) { + conf->fops = &clnt3_3_fop_prog; gf_log (this->name, GF_LOG_INFO, "Using Program %s, Num (%"PRId64"), " "Version (%"PRId64")", @@ -1622,16 +1736,21 @@ client_query_portmap_cbk (struct rpc_req *req, struct iovec *iov, int count, voi ret = -1; gf_log (this->name, ((!conf->portmap_err_logged) ? GF_LOG_ERROR : GF_LOG_DEBUG), - "failed to get the port number for remote subvolume"); + "failed to get the port number for remote subvolume. " + "Please run 'gluster volume status' on server to see " + "if brick process is running."); conf->portmap_err_logged = 1; goto out; } conf->portmap_err_logged = 0; + conf->disconnect_err_logged = 0; config.remote_port = rsp.port; rpc_clnt_reconfig (conf->rpc, &config); + conf->skip_notify = 1; + conf->quick_reconnect = 1; out: if (frame) @@ -1641,7 +1760,6 @@ out: /* Need this to connect the same transport on different port */ /* ie, glusterd to glusterfsd */ rpc_transport_disconnect (conf->rpc->conn.trans); - rpc_clnt_reconnect (conf->rpc->conn.trans); } return ret; |
