From 83277598bda524f44b76feed6adc7b19fc49d49a Mon Sep 17 00:00:00 2001 From: Mohammed Junaid Date: Mon, 19 Mar 2012 19:56:21 +0530 Subject: protocol/server: Handle server send reply failure gracefully. Server send reply failure should not call server connection cleanup because if a reconnection happens with in the grace-timeout the connection object is reused. We must cleanup only on grace-timeout. Change-Id: I7d171a863382646ff392031c2b845fe4f0d3d5dc BUG: 803365 Signed-off-by: Mohammed Junaid Reviewed-on: http://review.gluster.com/2947 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 +- xlators/protocol/client/src/client.c | 79 ++++++++++++++++----------- xlators/protocol/client/src/client.h | 12 ++--- xlators/protocol/server/src/server.c | 84 +++++++++++++++++++++-------- xlators/protocol/server/src/server.h | 2 + 5 files changed, 121 insertions(+), 59 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 2004a0834..8e35ad08a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -210,7 +210,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {VKEY_FEATURES_LIMIT_USAGE, "features/quota", "limit-set", NULL, NO_DOC, 0}, {"features.quota-timeout", "features/quota", "timeout", "0", DOC, 0}, {"server.statedump-path", "protocol/server", "statedump-path", NULL, NO_DOC, 0}, - {"client.lk-heal", "protocol/client", "lk-heal", NULL, DOC, 0}, + {"features.lock-heal", "protocol/client", "lk-heal", NULL, DOC, 0}, + {"features.lock-heal", "protocol/server", "lk-heal", NULL, DOC, 0}, {"client.grace-timeout", "protocol/client", "grace-timeout", NULL, DOC, 0}, {"server.grace-timeout", "protocol/server", "grace-timeout", NULL, DOC, 0}, {"feature.read-only", "features/read-only", "!read-only", "off", DOC, 0}, diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 00ec90ff0..078e6e5d0 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -116,6 +116,38 @@ out: return; } +int32_t +client_register_grace_timer (xlator_t *this, clnt_conf_t *conf) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + pthread_mutex_lock (&conf->lock); + { + if (conf->grace_timer || !conf->grace_timer_needed) { + gf_log (this->name, GF_LOG_TRACE, + "Client grace timer is already set " + "or a grace-timer has already time out, " + "not registering a new timer"); + } else { + gf_log (this->name, GF_LOG_INFO, + "Registering a grace timer"); + conf->grace_timer = + gf_timer_call_after (this->ctx, + conf->grace_tv, + client_grace_timeout, + conf->rpc); + } + } + pthread_mutex_unlock (&conf->lock); + + ret = 0; +out: + return ret; +} + int client_submit_request (xlator_t *this, void *req, call_frame_t *frame, rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn, @@ -1999,7 +2031,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, /* Cancel grace timer if set */ pthread_mutex_lock (&conf->lock); { - conf->grace_timer_flag = _gf_true; + conf->grace_timer_needed = _gf_true; if (conf->grace_timer) { gf_log (this->name, GF_LOG_WARNING, @@ -2016,28 +2048,10 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, break; } case RPC_CLNT_DISCONNECT: - /* client_mark_fd_bad (this); */ - - pthread_mutex_lock (&conf->lock); - { - if (conf->grace_timer || !conf->grace_timer_flag) { - gf_log (this->name, GF_LOG_TRACE, - "Client grace timer is already set " - "or a grace-timer has already timeout, " - "not registering a new timer"); - } else { - gf_log (this->name, GF_LOG_WARNING, - "Registering a grace timer"); - conf->grace_timer_flag = _gf_false; - - conf->grace_timer = - gf_timer_call_after (this->ctx, - conf->grace_tv, - client_grace_timeout, - conf->rpc); - } - } - pthread_mutex_unlock (&conf->lock); + if (!conf->lk_heal) + client_mark_fd_bad (this); + else + client_register_grace_timer (this, conf); if (!conf->skip_notify) { if (conf->connected) @@ -2263,6 +2277,9 @@ client_init_grace_timer (xlator_t *this, dict_t *options, if (!ret) gf_string2boolean (lk_heal, &conf->lk_heal); + gf_log (this->name, GF_LOG_DEBUG, "lk-heal = %s", + (conf->lk_heal) ? "on" : "off"); + ret = dict_get_int32 (options, "grace-timeout", &grace_timeout); if (!ret) conf->grace_tv.tv_sec = grace_timeout; @@ -2271,8 +2288,8 @@ client_init_grace_timer (xlator_t *this, dict_t *options, conf->grace_tv.tv_usec = 0; - gf_log (this->name, GF_LOG_INFO, "lk-heal = %s", - (conf->lk_heal) ? "on" : "off"); + gf_log (this->name, GF_LOG_DEBUG, "Client grace timeout " + "value = %"PRIu64, conf->grace_tv.tv_sec); ret = 0; out: @@ -2363,9 +2380,9 @@ init (xlator_t *this) INIT_LIST_HEAD (&conf->saved_fds); /* Initialize parameters for lock self healing*/ - conf->lk_version = 1; - conf->grace_timer = NULL; - conf->grace_timer_flag = _gf_true; + conf->lk_version = 1; + conf->grace_timer = NULL; + conf->grace_timer_needed = _gf_true; ret = client_init_grace_timer (this, this->options, conf); if (ret) @@ -2644,10 +2661,12 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL }, { .key = {"lk-heal"}, - .type = GF_OPTION_TYPE_STR + .type = GF_OPTION_TYPE_BOOL, }, { .key = {"grace-timeout"}, - .type = GF_OPTION_TYPE_INT + .type = GF_OPTION_TYPE_INT, + .min = 10, + .max = 1800 }, {.key = {"tcp-window-size"}, .type = GF_OPTION_TYPE_SIZET, diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index e1647d9fa..352d2b371 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -106,12 +106,12 @@ typedef struct clnt_conf { performing lock healing */ struct timeval grace_tv; gf_timer_t *grace_timer; - gf_boolean_t grace_timer_flag; /* The state of this flag will - be used to decide whether - a new grace-timer must be - registered or not. False - means dont register, true - means register */ + gf_boolean_t grace_timer_needed; /* The state of this flag will + be used to decide whether + a new grace-timer must be + registered or not. False + means dont register, true + means register */ char parent_down; } clnt_conf_t; diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 519d378ac..d22852404 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -128,7 +128,8 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, struct iovec rsp = {0,}; server_state_t *state = NULL; char new_iobref = 0; - server_connection_t *conn = NULL; + server_connection_t *conn = NULL; + gf_boolean_t lk_heal = _gf_false; GF_VALIDATE_OR_GOTO ("server", req, ret); @@ -138,6 +139,9 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, conn = SERVER_CONNECTION(frame); } + if (conn) + lk_heal = ((server_conf_t *) conn->this->private)->lk_heal; + if (!iobref) { iobref = iobref_new (); if (!iobref) { @@ -170,9 +174,14 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, iobuf_unref (iob); if (ret == -1) { gf_log_callingfn ("", GF_LOG_ERROR, "Reply submission failed"); - if (frame && conn) + if (frame && conn && !lk_heal) { server_connection_cleanup (frame->this, conn, INTERNAL_LOCKS | POSIX_LOCKS); + } else { + /* TODO: Failure of open(dir), create, inodelk, entrylk + or lk fops send failure must be handled specially. */ + ; + } goto ret; } @@ -614,6 +623,7 @@ int server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) { + gf_boolean_t detached = _gf_false; xlator_t *this = NULL; rpc_transport_t *xprt = NULL; server_connection_t *conn = NULL; @@ -655,32 +665,44 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, if (!conn) break; - put_server_conn_state (this, xprt); gf_log (this->name, GF_LOG_INFO, "disconnecting connection" "from %s", conn->id); - server_connection_cleanup (this, conn, INTERNAL_LOCKS); - pthread_mutex_lock (&conf->mutex); - { - list_del (&xprt->list); - } - pthread_mutex_unlock (&conf->mutex); - pthread_mutex_lock (&conn->lock); - { - if (conn->timer) - goto unlock; + /* If lock self heal is off, then destroy the + conn object, else register a grace timer event */ + if (!conf->lk_heal) { + server_conn_ref (conn); + server_connection_put (this, conn, &detached); + if (detached) + server_connection_cleanup (this, conn, + INTERNAL_LOCKS | + POSIX_LOCKS); + server_conn_unref (conn); + } else { + put_server_conn_state (this, xprt); + server_connection_cleanup (this, conn, INTERNAL_LOCKS); + pthread_mutex_lock (&conf->mutex); + { + list_del (&xprt->list); + } + pthread_mutex_unlock (&conf->mutex); - gf_log (this->name, GF_LOG_INFO, "starting a grace " - "timer for %s", conn->id); + pthread_mutex_lock (&conn->lock); + { + if (conn->timer) + goto unlock; - conn->timer = gf_timer_call_after (this->ctx, - conf->grace_tv, - grace_time_handler, - conn); - } - unlock: - pthread_mutex_unlock (&conn->lock); + gf_log (this->name, GF_LOG_INFO, "starting a grace " + "timer for %s", xprt->name); + conn->timer = gf_timer_call_after (this->ctx, + conf->grace_tv, + grace_time_handler, + conn); + } + unlock: + pthread_mutex_unlock (&conn->lock); + } break; case RPCSVC_EVENT_TRANSPORT_DESTROY: /*- conn obj has been disassociated from xprt on first @@ -758,17 +780,30 @@ server_init_grace_timer (xlator_t *this, dict_t *options, { int32_t ret = -1; int32_t grace_timeout = -1; + char *lk_heal = NULL; GF_VALIDATE_OR_GOTO ("server", this, out); GF_VALIDATE_OR_GOTO (this->name, options, out); GF_VALIDATE_OR_GOTO (this->name, conf, out); + conf->lk_heal = _gf_true; + + ret = dict_get_str (options, "lk-heal", &lk_heal); + if (!ret) + gf_string2boolean (lk_heal, &conf->lk_heal); + + gf_log (this->name, GF_LOG_DEBUG, "lk-heal = %s", + (conf->lk_heal) ? "on" : "off"); + ret = dict_get_int32 (options, "grace-timeout", &grace_timeout); if (!ret) conf->grace_tv.tv_sec = grace_timeout; else conf->grace_tv.tv_sec = 10; + gf_log (this->name, GF_LOG_DEBUG, "Server grace timeout " + "value = %"PRIu64, conf->grace_tv.tv_sec); + conf->grace_tv.tv_usec = 0; ret = 0; @@ -1146,8 +1181,13 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_PATH, .default_value = "/tmp" }, + { .key = {"lk-heal"}, + .type = GF_OPTION_TYPE_BOOL, + }, {.key = {"grace-timeout"}, .type = GF_OPTION_TYPE_INT, + .min = 10, + .max = 1800, }, {.key = {"tcp-window-size"}, .type = GF_OPTION_TYPE_SIZET, diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 4213faa17..d000c8d36 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -108,6 +108,8 @@ struct server_conf { int inode_lru_limit; gf_boolean_t verify_volfile; gf_boolean_t trace; + gf_boolean_t lk_heal; /* If true means lock self + heal is on else off. */ char *conf_dir; struct _volfile_ctx *volfile; struct timeval grace_tv; -- cgit