summaryrefslogtreecommitdiffstats
path: root/rpc
diff options
context:
space:
mode:
authorRajesh Joseph <rjoseph@redhat.com>2016-12-03 01:10:51 +0530
committerAtin Mukherjee <amukherj@redhat.com>2016-12-05 06:09:48 -0800
commitaa22f24f5db7659387704998ae01520708869873 (patch)
tree65ec974c4feea508ee09c01da6ff2347491f73c4 /rpc
parent45f914ec9c7b15ba8e962b8fae3593f06912c1f0 (diff)
rpc: fix for race between rpc and protocol/client
It is possible that the notification thread which notifies protocol/client layer about the disconnection is put to sleep and meanwhile, a fuse thread or a timer thread initiates and completes reconnection to the brick. The notification thread is then woken up and protocol/client layer updates its flags to indicate that network is disconnected. No reconnection is initiated because reconnection is rpc-lib layer's responsibility and its flags indicate that connection is connected. Fix: Serialize connect and disconnect notify Credit: Raghavendra Talur <rtalur@redhat.com> Change-Id: I8ff5d1a3283b47f5c26848a42016a40bc34ffc1d BUG: 1386626 Signed-off-by: Rajesh Joseph <rjoseph@redhat.com> Reviewed-on: http://review.gluster.org/15916 Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Talur <rtalur@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'rpc')
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c98
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h1
2 files changed, 59 insertions, 40 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index e8a8ea2..b868f56 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -871,6 +871,41 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);
#define RPC_THIS_RESTORE (THIS = old_THIS)
+static int
+rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
+{
+ struct timespec ts = {0, };
+ gf_boolean_t unref_clnt = _gf_false;
+
+ rpc_clnt_connection_cleanup (conn);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
+ ts.tv_sec = 10;
+ ts.tv_nsec = 0;
+
+ rpc_clnt_ref (clnt);
+ conn->reconnect = gf_timer_call_after (clnt->ctx, ts,
+ rpc_clnt_reconnect, conn);
+ if (conn->reconnect == NULL) {
+ gf_log (conn->name, GF_LOG_WARNING,
+ "Cannot create rpc_clnt_reconnect timer");
+ unref_clnt = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (clnt->notifyfn)
+ clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
+
+ if (unref_clnt)
+ rpc_clnt_ref (clnt);
+
+ return 0;
+}
+
int
rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
rpc_transport_event_t event, void *data, ...)
@@ -880,9 +915,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
int ret = -1;
rpc_request_info_t *req_info = NULL;
rpc_transport_pollin_t *pollin = NULL;
- struct timespec ts = {0, };
void *clnt_mydata = NULL;
- gf_boolean_t unref_clnt = _gf_false;
DECLARE_OLD_THIS;
conn = mydata;
@@ -898,35 +931,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
switch (event) {
case RPC_TRANSPORT_DISCONNECT:
{
- rpc_clnt_connection_cleanup (conn);
-
- pthread_mutex_lock (&conn->lock);
+ pthread_mutex_lock (&clnt->notifylock);
{
- if (!conn->rpc_clnt->disabled
- && (conn->reconnect == NULL)) {
- ts.tv_sec = 10;
- ts.tv_nsec = 0;
-
- rpc_clnt_ref (clnt);
- conn->reconnect =
- gf_timer_call_after (clnt->ctx, ts,
- rpc_clnt_reconnect,
- conn);
- if (conn->reconnect == NULL) {
- gf_log (conn->name, GF_LOG_WARNING,
- "Cannot create rpc_clnt_reconnect timer");
- unref_clnt = _gf_true;
- }
- }
+ rpc_clnt_handle_disconnect (clnt, conn);
}
- pthread_mutex_unlock (&conn->lock);
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_DISCONNECT, NULL);
- if (unref_clnt)
- rpc_clnt_ref (clnt);
-
+ pthread_mutex_unlock (&clnt->notifylock);
break;
}
@@ -981,17 +990,21 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
case RPC_TRANSPORT_CONNECT:
{
- /* Every time there is a disconnection, processes
- should try to connect to 'glusterd' (ie, default
- port) or whichever port given as 'option remote-port'
- in volume file. */
- /* Below code makes sure the (re-)configured port lasts
- for just one successful attempt */
- conn->config.remote_port = 0;
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_CONNECT, NULL);
+ pthread_mutex_lock (&clnt->notifylock);
+ {
+ /* Every time there is a disconnection, processes
+ * should try to connect to 'glusterd' (ie, default
+ * port) or whichever port given as 'option remote-port'
+ * in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ * for just one successful attempt */
+ conn->config.remote_port = 0;
+
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn (clnt, clnt->mydata,
+ RPC_CLNT_CONNECT, NULL);
+ }
+ pthread_mutex_unlock (&clnt->notifylock);
break;
}
@@ -1115,6 +1128,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
}
pthread_mutex_init (&rpc->lock, NULL);
+ pthread_mutex_init (&rpc->notifylock, NULL);
rpc->ctx = ctx;
rpc->owner = owner;
@@ -1124,6 +1138,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
if (rpc->reqpool == NULL) {
pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->notifylock);
GF_FREE (rpc);
rpc = NULL;
goto out;
@@ -1133,6 +1148,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
reqpool_size);
if (rpc->saved_frames_pool == NULL) {
pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->notifylock);
mem_pool_destroy (rpc->reqpool);
GF_FREE (rpc);
rpc = NULL;
@@ -1142,6 +1158,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
ret = rpc_clnt_connection_init (rpc, ctx, options, name);
if (ret == -1) {
pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->notifylock);
mem_pool_destroy (rpc->reqpool);
mem_pool_destroy (rpc->saved_frames_pool);
GF_FREE (rpc);
@@ -1737,6 +1754,7 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
saved_frames_destroy (rpc->conn.saved_frames);
pthread_mutex_destroy (&rpc->lock);
pthread_mutex_destroy (&rpc->conn.lock);
+ pthread_mutex_destroy (&rpc->notifylock);
/* mem-pool should be destroyed, otherwise,
it will cause huge memory leaks */
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index f84b4cb..3a5b287 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -172,6 +172,7 @@ struct rpc_req {
typedef struct rpc_clnt {
pthread_mutex_t lock;
+ pthread_mutex_t notifylock;
rpc_clnt_notify_t notifyfn;
rpc_clnt_connection_t conn;
void *mydata;