summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2016-02-29 14:43:58 +0530
committerJeff Darcy <jdarcy@redhat.com>2016-03-10 09:24:33 -0800
commit2bfdc30e0e7fba6f97d8829b2618a1c5907dc404 (patch)
tree35fefca2eb0502aa42993e1644304aebbbe0c233
parent537822d7eb7732f2f65300668355b6d7db448f39 (diff)
protocol client/server: Fix client-server handshake
Problem: Currently on a successful connection between protocol server and client, the protocol client initiates a CHILD_UP event in the client stack. At this point in time, only the connection between server and client is established, and there is no guarantee that the server side stack is ready to serve requests. It works fine now, as most server side translators are not dependent on any other factors, before being able to serve requests today and hence they are up by the time the client stack translators receive the CHILD_UP (initiated by client handshake). The gap here is exposed when certain server side translators like NSR-Server for example, have a couple of protocol clients as their child(connecting them to other bricks), and they can't really serve requests till a quorum of their children are up. Hence these translators should defer sending CHILD_UP till they have enough children up, and the same needs to be propagated to the client stack translators. Fix: Maintain a child_up variable in both the protocol client and protocol server translators. The protocol server should update this value based on the CHILD_UP and CHILD_DOWN events it receives from the translators below it. On receiving such an event it should forward that event to the client. The protocol client on receiving such an event should forward it up the client stack, thereby letting the client translators correctly know that the server is up and ready to serve. The clients connecting later(long after a server has initialized and processed it's CHILD_UP events), will receive a child_up status as part of the handshake, and based on the status of the server's child_up, can either propagate a CHILD_UP event or defer it. Change-Id: I0807141e62118d8de9d9cde57a53a607be44a0e0 BUG: 1312845 Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/13549 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--rpc/rpc-lib/src/protocol-common.h2
-rw-r--r--xlators/protocol/client/src/client-callback.c52
-rw-r--r--xlators/protocol/client/src/client-handshake.c30
-rw-r--r--xlators/protocol/client/src/client-messages.h1
-rw-r--r--xlators/protocol/client/src/client.c2
-rw-r--r--xlators/protocol/client/src/client.h3
-rw-r--r--xlators/protocol/server/src/server-handshake.c6
-rw-r--r--xlators/protocol/server/src/server.c71
-rw-r--r--xlators/protocol/server/src/server.h2
9 files changed, 157 insertions, 12 deletions
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 4dec463..4058295 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -134,6 +134,8 @@ enum gf_cbk_procnum {
GF_CBK_EVENT_NOTIFY,
GF_CBK_GET_SNAPS,
GF_CBK_CACHE_INVALIDATION,
+ GF_CBK_CHILD_UP,
+ GF_CBK_CHILD_DOWN,
GF_CBK_MAXVALUE,
};
diff --git a/xlators/protocol/client/src/client-callback.c b/xlators/protocol/client/src/client-callback.c
index 16f5441..7ee2113 100644
--- a/xlators/protocol/client/src/client-callback.c
+++ b/xlators/protocol/client/src/client-callback.c
@@ -81,13 +81,53 @@ out:
return 0;
}
+int
+client_cbk_child_up (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, rpc, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ gf_msg_debug (this->name, 0, "Received CHILD_UP");
+ conf->child_up = _gf_true;
+
+ this->notify (this, GF_EVENT_CHILD_UP, NULL);
+out:
+ return 0;
+}
+
+int
+client_cbk_child_down (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, rpc, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ gf_msg_debug (this->name, 0, "Received CHILD_DOWN");
+ conf->child_up = _gf_false;
+
+ this->notify (this, GF_EVENT_CHILD_DOWN, NULL);
+out:
+ return 0;
+}
+
rpcclnt_cb_actor_t gluster_cbk_actors[GF_CBK_MAXVALUE] = {
- [GF_CBK_NULL] = {"NULL", GF_CBK_NULL, client_cbk_null },
- [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, client_cbk_fetchspec },
- [GF_CBK_INO_FLUSH] = {"INO_FLUSH", GF_CBK_INO_FLUSH, client_cbk_ino_flush },
- [GF_CBK_CACHE_INVALIDATION] = {"CACHE_INVALIDATION",
- GF_CBK_CACHE_INVALIDATION,
- client_cbk_cache_invalidation },
+ [GF_CBK_NULL] = {"NULL", GF_CBK_NULL, client_cbk_null },
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, client_cbk_fetchspec },
+ [GF_CBK_INO_FLUSH] = {"INO_FLUSH", GF_CBK_INO_FLUSH, client_cbk_ino_flush },
+ [GF_CBK_CACHE_INVALIDATION] = {"CACHE_INVALIDATION", GF_CBK_CACHE_INVALIDATION, client_cbk_cache_invalidation },
+ [GF_CBK_CHILD_UP] = {"CHILD_UP", GF_CBK_CHILD_UP, client_cbk_child_up },
+ [GF_CBK_CHILD_DOWN] = {"CHILD_DOWN", GF_CBK_CHILD_DOWN, client_cbk_child_down },
};
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 3b170b8..5352e54 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -129,13 +129,26 @@ client_notify_parents_child_up (xlator_t *this)
clnt_conf_t *conf = NULL;
int ret = 0;
+ GF_VALIDATE_OR_GOTO("client", this, out);
conf = this->private;
- ret = client_notify_dispatch_uniq (this, GF_EVENT_CHILD_UP, NULL);
- if (ret)
- gf_msg (this->name, GF_LOG_INFO, 0,
- PC_MSG_CHILD_UP_NOTIFY_FAILED, "notify of CHILD_UP "
- "failed");
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->child_up) {
+ ret = client_notify_dispatch_uniq (this, GF_EVENT_CHILD_UP,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ PC_MSG_CHILD_UP_NOTIFY_FAILED,
+ "notify of CHILD_UP failed");
+ goto out;
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_STATUS,
+ "Defering sending CHILD_UP message as the client "
+ "translators are not yet ready to serve.");
+ }
+out:
return 0;
}
@@ -1157,6 +1170,13 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
goto out;
}
+ ret = dict_get_uint32 (reply, "child_up", &conf->child_up);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED,
+ "failed to find key 'child_up' in the options");
+ goto out;
+ }
+
ret = dict_get_uint32 (reply, "clnt-lk-version", &lk_ver);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED,
diff --git a/xlators/protocol/client/src/client-messages.h b/xlators/protocol/client/src/client-messages.h
index 0fc9d31..a4b391b 100644
--- a/xlators/protocol/client/src/client-messages.h
+++ b/xlators/protocol/client/src/client-messages.h
@@ -617,6 +617,7 @@
* @recommendedaction
*
*/
+#define PC_MSG_CHILD_STATUS (GLFS_PC_BASE + 64)
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 29fe44c..f1f58eb 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -2467,6 +2467,8 @@ init (xlator_t *this)
pthread_mutex_init (&conf->lock, NULL);
INIT_LIST_HEAD (&conf->saved_fds);
+ conf->child_up = _gf_false;
+
/* Initialize parameters for lock self healing*/
conf->lk_version = 1;
conf->grace_timer = NULL;
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index 415fabb..a4d4d9f 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -133,6 +133,9 @@ typedef struct clnt_conf {
gf_boolean_t destroy; /* if enabled implies fini was called
* on @this xlator instance */
+
+ gf_boolean_t child_up; /* Set to true, when child is up, and
+ * false, when child is down */
} clnt_conf_t;
typedef struct _client_fd_ctx {
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index 293509c..fe5dfba 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -385,6 +385,12 @@ server_setvolume (rpcsvc_request_t *req)
goto fail;
}
+ ret = dict_set_int32 (reply, "child_up", conf->child_up);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' "
+ "in the reply dict");
+
buf = memdup (args.dict.dict_val, args.dict.dict_len);
if (buf == NULL) {
op_ret = -1;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index d07b840..24e3150 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -958,6 +958,8 @@ init (xlator_t *this)
if (ret)
conf->conf_dir = CONFDIR;
+ conf->child_up = _gf_false;
+
/*ret = dict_get_str (this->options, "statedump-path", &statedump_path);
if (!ret) {
gf_path_strip_trailing_slashes (statedump_path);
@@ -1237,6 +1239,35 @@ out:
}
int
+server_process_child_event (xlator_t *this, int32_t event, void *data,
+ enum gf_cbk_procnum cbk_procnum)
+{
+ int ret = -1;
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ rpcsvc_callback_submit (conf->rpc, xprt,
+ &server_cbk_prog,
+ cbk_procnum,
+ NULL, 0);
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
notify (xlator_t *this, int32_t event, void *data, ...)
{
int ret = -1;
@@ -1246,6 +1277,10 @@ notify (xlator_t *this, int32_t event, void *data, ...)
server_conf_t *conf = NULL;
va_list ap;
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
dict = data;
va_start (ap, data);
output = va_arg (ap, dict_t*);
@@ -1272,7 +1307,41 @@ notify (xlator_t *this, int32_t event, void *data, ...)
conf->parent_up = _gf_true;
- /* fall through and notify the event to children */
+ default_notify (this, event, data);
+ break;
+ }
+
+ case GF_EVENT_CHILD_UP:
+ {
+ conf->child_up = _gf_true;
+ ret = server_process_child_event (this, event, data,
+ GF_CBK_CHILD_UP);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_SERVER_EVENT_UPCALL_FAILED,
+ "server_process_child_event failed");
+ goto out;
+ }
+
+ default_notify (this, event, data);
+ break;
+ }
+
+ case GF_EVENT_CHILD_DOWN:
+ {
+ conf->child_up = _gf_false;
+ ret = server_process_child_event (this, event, data,
+ GF_CBK_CHILD_DOWN);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_SERVER_EVENT_UPCALL_FAILED,
+ "server_process_child_event failed");
+ goto out;
+ }
+
+ default_notify (this, event, data);
+ break;
+
}
default:
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
index c0e2752..7980d30 100644
--- a/xlators/protocol/server/src/server.h
+++ b/xlators/protocol/server/src/server.h
@@ -72,6 +72,8 @@ struct server_conf {
* in case if volume set options
* (say *.allow | *.reject) are
* tweeked */
+ gf_boolean_t child_up; /* Set to true, when child is up, and
+ * false, when child is down */
};
typedef struct server_conf server_conf_t;