From 04e673f14e31c60e4c9cde9072bcec610fe3884b Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 31 Dec 2012 10:03:32 +0530 Subject: protocol/client: Periodically attempt reopens If the brick is taken down and the hard disk is replaced and the brick is brought back up, the re-opens of the open-fds will fail because the file is not present on the brick. Re-opens are not attempted even if the files are re-created by self-heal until the brick is brought down after the files are re-created and brought back up. This is a problem with a VM-store in a replica-setup. Until the fd is re-opened the writes will never happen on the brick where the hard-disk is replaced. To handle this situation gracefully, client xlator is enhanced to perform finodelk, fxattrop, writev, readv using anonymous fds if the file is yet to be re-opened. If the fop succeeds then client xlator attempts re-open. Change-Id: I1cc6d1bbf8227cd996868ab2ed0a57fb05e00017 BUG: 821056 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/4358 Tested-by: Gluster Build System Reviewed-by: Jeff Darcy --- xlators/protocol/client/src/client-handshake.c | 99 +++++++++++++++++++++----- 1 file changed, 81 insertions(+), 18 deletions(-) (limited to 'xlators/protocol/client/src/client-handshake.c') diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 7862635fc..75c58afe8 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -25,6 +25,7 @@ #include "portmap-xdr.h" #include "rpc-common-xdr.h" +#define CLIENT_REOPEN_MAX_ATTEMPTS 1024 extern rpc_clnt_prog_t clnt3_3_fop_prog; extern rpc_clnt_prog_t clnt_pmap_prog; @@ -886,22 +887,16 @@ client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) } void -client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) { clnt_conf_t *conf = NULL; - uint64_t fd_count = 0; gf_boolean_t destroy = _gf_false; conf = this->private; - LOCK (&conf->rec_lock); - { - fd_count = --(conf->reopen_fd_count); - } - UNLOCK (&conf->rec_lock); - pthread_mutex_lock (&conf->lock); { + fdctx->reopen_attempts = 0; if (!fdctx->released) list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); else @@ -910,14 +905,31 @@ client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) } pthread_mutex_unlock (&conf->lock); + if (destroy) + client_fdctx_destroy (this, fdctx); +} + +void +client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + uint64_t fd_count = 0; + + conf = this->private; + + LOCK (&conf->rec_lock); + { + fd_count = --(conf->reopen_fd_count); + } + UNLOCK (&conf->rec_lock); + + client_reopen_done (fdctx, this); if (fd_count == 0) { gf_log (this->name, GF_LOG_INFO, "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); client_set_lk_version (this); client_notify_parents_child_up (this); } - if (destroy) - client_fdctx_destroy (this, fdctx); } int @@ -1075,8 +1087,8 @@ out: return 0; } -int -protocol_client_reopendir (xlator_t *this, clnt_fd_ctx_t *fdctx) +static int +protocol_client_reopendir (clnt_fd_ctx_t *fdctx, xlator_t *this) { int ret = -1; gfs3_opendir_req req = {{0,},}; @@ -1139,8 +1151,8 @@ out: } -int -protocol_client_reopen (xlator_t *this, clnt_fd_ctx_t *fdctx) +static int +protocol_client_reopenfile (clnt_fd_ctx_t *fdctx, xlator_t *this) { int ret = -1; gfs3_open_req req = {{0,},}; @@ -1204,6 +1216,60 @@ out: } +static void +protocol_client_reopen (clnt_fd_ctx_t *fdctx, xlator_t *this) +{ + if (fdctx->is_dir) + protocol_client_reopendir (fdctx, this); + else + protocol_client_reopenfile (fdctx, this); +} + +gf_boolean_t +__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx) +{ + if (fdctx->reopen_done == client_default_reopen_done) + return _gf_false; + return _gf_true; +} + +void +client_attempt_reopen (fd_t *fd, xlator_t *this) +{ + clnt_conf_t *conf = NULL; + clnt_fd_ctx_t *fdctx = NULL; + gf_boolean_t reopen = _gf_false; + + if (!fd || !this) + goto out; + + conf = this->private; + pthread_mutex_lock (&conf->lock); + { + fdctx = this_fd_get_ctx (fd, this); + if (!fdctx) + goto unlock; + if (__is_fd_reopen_in_progress (fdctx)) + goto unlock; + if (fdctx->remote_fd != -1) + goto unlock; + + if (fdctx->reopen_attempts == CLIENT_REOPEN_MAX_ATTEMPTS) { + reopen = _gf_true; + fdctx->reopen_done = client_reopen_done; + list_del_init (&fdctx->sfd_pos); + } else { + fdctx->reopen_attempts++; + } + } +unlock: + pthread_mutex_unlock (&conf->lock); + if (reopen) + protocol_client_reopen (fdctx, this); +out: + return; +} + int client_post_handshake (call_frame_t *frame, xlator_t *this) { @@ -1246,10 +1312,7 @@ client_post_handshake (call_frame_t *frame, xlator_t *this) list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { list_del_init (&fdctx->sfd_pos); - if (fdctx->is_dir) - protocol_client_reopendir (this, fdctx); - else - protocol_client_reopen (this, fdctx); + protocol_client_reopen (fdctx, this); } } else { gf_log (this->name, GF_LOG_DEBUG, -- cgit