From f18a3f30bbeaf3bb067b913082830d7f874555ca Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Thu, 5 Feb 2015 15:41:35 +0530 Subject: protocol/client: sequence CHILD_UP, CHILD_DOWN etc notifications ... from all bricks in the volume This patch is important in the context of MT epoll. With MT epoll, notification events from client xlators could reach cluster xlators like afr, dht, ec, stripe etc. in different orders. For e.g, In a distributed replicate volume of 2 bricks, namely Brick1 and Brick2, the following network events are observed by a mount process. - connection to Brick1 is broken. - connection to Brick1 has been restored. - connection to Brick2 is broken. - connection to Brick2 has been restored. Without establishing a total ordering of events, we can't guarantee that cluster xlators like afr, dht perceive them in the same order. While we would expect afr (say) to perceive it as only one of Brick1 and Brick2 going down at any given time, it is possible for the notification of Brick2 going offline to race with the notification of Brick1 coming back online. Change-Id: I78f5a52bfb05593335d0e9ad53ebfff98995593d BUG: 1104462 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/9591 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/protocol/client/src/client-handshake.c | 15 +++-- xlators/protocol/client/src/client.c | 78 ++++++++++++++++++++------ xlators/protocol/client/src/client.h | 2 + 3 files changed, 71 insertions(+), 24 deletions(-) (limited to 'xlators/protocol') diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 42b7ac0745e..531b38eaf83 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -131,12 +131,11 @@ client_notify_parents_child_up (xlator_t *this) int ret = 0; conf = this->private; - ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + ret = client_notify_dispatch (this, GF_EVENT_CHILD_UP, NULL); if (ret) gf_log (this->name, GF_LOG_INFO, "notify of CHILD_UP failed"); - conf->last_sent_event = GF_EVENT_CHILD_UP; return 0; } @@ -1146,11 +1145,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m op_ret = 0; } if (op_errno == ESTALE) { - ret = default_notify (this, GF_EVENT_VOLFILE_MODIFIED, NULL); + ret = client_notify_dispatch (this, + GF_EVENT_VOLFILE_MODIFIED, + NULL); if (ret) gf_log (this->name, GF_LOG_INFO, "notify of VOLFILE_MODIFIED failed"); - conf->last_sent_event = GF_EVENT_VOLFILE_MODIFIED; } goto out; } @@ -1223,13 +1223,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m out: if (auth_fail) { gf_log (this->name, GF_LOG_INFO, "sending AUTH_FAILED event"); - ret = default_notify (this, GF_EVENT_AUTH_FAILED, NULL); + ret = client_notify_dispatch (this, GF_EVENT_AUTH_FAILED, NULL); if (ret) gf_log (this->name, GF_LOG_INFO, "notify of AUTH_FAILED failed"); conf->connecting = 0; conf->connected = 0; - conf->last_sent_event = GF_EVENT_AUTH_FAILED; ret = -1; } if (-1 == op_ret) { @@ -1238,11 +1237,11 @@ out: * tell the parents that i am all ok.. */ gf_log (this->name, GF_LOG_INFO, "sending CHILD_CONNECTING event"); - ret = default_notify (this, GF_EVENT_CHILD_CONNECTING, NULL); + ret = client_notify_dispatch (this, GF_EVENT_CHILD_CONNECTING, + NULL); if (ret) gf_log (this->name, GF_LOG_INFO, "notify of CHILD_CONNECTING failed"); - conf->last_sent_event = GF_EVENT_CHILD_CONNECTING; conf->connecting= 1; ret = 0; } diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 999a4a5c836..00d88d6e7a1 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -34,6 +34,55 @@ int client_init_rpc (xlator_t *this); int client_destroy_rpc (xlator_t *this); int client_mark_fd_bad (xlator_t *this); +static int +client_notify_dispatch_uniq (xlator_t *this, int32_t event, void *data, ...) +{ + clnt_conf_t *conf = this->private; + + if (conf->last_sent_event == event) + return 0; + + return client_notify_dispatch (this, event, data); +} + +int +client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...) +{ + int ret = -1; + glusterfs_ctx_t *ctx = this->ctx; + clnt_conf_t *conf = this->private; + + pthread_mutex_lock (&ctx->notify_lock); + { + while (ctx->notifying) + pthread_cond_wait (&ctx->notify_cond, + &ctx->notify_lock); + ctx->notifying = 1; + } + pthread_mutex_unlock (&ctx->notify_lock); + + /* We assume that all translators in the graph handle notification + * events in sequence. + * */ + ret = default_notify (this, event, data); + + /* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed + * that there would be atmost one poller thread executing this + * notification function. This allows us to update last_sent_event + * without explicit synchronization. See epoll(7). + */ + conf->last_sent_event = event; + + pthread_mutex_lock (&ctx->notify_lock); + { + ctx->notifying = 0; + pthread_cond_signal (&ctx->notify_cond); + } + pthread_mutex_unlock (&ctx->notify_lock); + + return ret; +} + int32_t client_type_to_gf_type (short l_type) { @@ -2169,14 +2218,12 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, "handshake msg returned %d", ret); } else { //conf->rpc->connected = 1; - if (conf->last_sent_event != GF_EVENT_CHILD_UP) { - ret = default_notify (this, GF_EVENT_CHILD_UP, - NULL); - if (ret) - gf_log (this->name, GF_LOG_INFO, - "CHILD_UP notify failed"); - conf->last_sent_event = GF_EVENT_CHILD_UP; - } + ret = client_notify_dispatch_uniq (this, + GF_EVENT_CHILD_UP, + NULL); + if (ret) + gf_log (this->name, GF_LOG_INFO, + "CHILD_UP notify failed"); } /* Cancel grace timer if set */ @@ -2224,14 +2271,13 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, may get screwed up.. (eg. CHILD_MODIFIED event in replicate), hence make sure events which are passed to parent are genuine */ - if (conf->last_sent_event != GF_EVENT_CHILD_DOWN) { - ret = default_notify (this, GF_EVENT_CHILD_DOWN, - NULL); - if (ret) - gf_log (this->name, GF_LOG_INFO, - "CHILD_DOWN notify failed"); - conf->last_sent_event = GF_EVENT_CHILD_DOWN; - } + ret = client_notify_dispatch_uniq (this, + GF_EVENT_CHILD_DOWN, + NULL); + if (ret) + gf_log (this->name, GF_LOG_INFO, + "CHILD_DOWN notify failed"); + } else { if (conf->connected) gf_log (this->name, GF_LOG_DEBUG, diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index af70926b178..1aea1353727 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -262,4 +262,6 @@ int client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd, int64_t remote_fd); gf_boolean_t __is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx); +int +client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...); #endif /* !_CLIENT_H */ -- cgit