summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2017-03-07 18:36:58 -0500
committerShyamsundar Ranganathan <srangana@redhat.com>2017-03-10 14:50:11 -0500
commite6c10359ab90178f89aa23ae9328174c2602e88d (patch)
tree008b7f03ad551b7eec3e23e29fdd3370c2ac831f
parent2579daf23d6039d2b001d6e2392a952b39317af4 (diff)
glusterd: don't queue attach reqs before connecting
This was causing USS tests to fail. The underlying problem here is that if we try to queue the attach request too soon after starting a brick process then the socket code will get an error trying to write to the still-unconnected socket. Its response is to shut down the socket, which causes the queued attach requests to be force-unwound. There's nothing to retry them, so they effectively never happen and those bricks (second and succeeding for a snapshot) never become available. We *do* have a retry loop for attach requests, but currently break out as soon as a request is queued - not actually sent. The fix is to modify that loop so it will wait some more if the rpc connection isn't even complete yet. Now we break out only when we have a completed connection *and* a queued request. Backport of: > 53e2c875cf97df8337f7ddb5124df2fc6dd37bca > BUG: 1430148 > Reviewed-on: https://review.gluster.org/16868 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> BUG: 1431176 Change-Id: Ib6be13646f1fa9072b4a944ab5f13e1b29084841 Reviewed-on: https://review.gluster.org/16887 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c29
1 files changed, 18 insertions, 11 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index c501e1ad00b..97f95adbd11 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -4913,22 +4913,29 @@ my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
int
send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
{
- int ret = -1;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct iovec iov = {0, };
- ssize_t req_size = 0;
- call_frame_t *frame = NULL;
- gd1_mgmt_brick_op_req brick_req;
- void *req = &brick_req;
- void *errlbl = &&err;
- extern struct rpc_clnt_program gd_brick_prog;
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ void *req = &brick_req;
+ void *errlbl = &&err;
+ struct rpc_clnt_connection *conn;
+ extern struct rpc_clnt_program gd_brick_prog;
if (!rpc) {
gf_log (this->name, GF_LOG_ERROR, "called with null rpc");
return -1;
}
+ conn = &rpc->conn;
+ if (!conn->connected || conn->disconnected) {
+ gf_log (this->name, GF_LOG_INFO, "not connected yet");
+ return -1;
+ }
+
brick_req.op = op;
brick_req.name = path;
brick_req.input.input_val = NULL;
@@ -5046,7 +5053,7 @@ attach_brick (xlator_t *this,
(void) build_volfile_path (full_id, path, sizeof(path), NULL);
int tries = 0;
- while (tries++ <= 10) {
+ while (tries++ <= 15) {
ret = send_attach_req (this, other_brick->rpc, path,
GLUSTERD_BRICK_ATTACH);
if (!ret) {