summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoranand <anekkunt@redhat.com>2015-07-21 15:42:24 +0530
committerAtin Mukherjee <amukherj@redhat.com>2015-08-06 23:24:22 -0700
commit90beb1602cb2926c14a9bd02653cd75b40661cd4 (patch)
tree10add76f47f869d7069ccffb8a2c46c4ab72d964
parentb8120c91cb11de7b21f2d6a36f7dc89ad0b7c387 (diff)
glusterd: getting txn_id from frame->cookie in op_sm call back
RCA: If rebalance start is triggered from one node and one of other nodes in the cluster goes down simultaneously we might end up in a case where callback will use the txn_id from priv->global_txn_id which is always zeros and this means injecting an event with an incorrect txn_id will result into op-sm getting stuck. fix: set txn_id in frame->cookie during sumbit_and_request, so that we can get txn_id in call back functions. Back port of : >Change-Id: I519176c259ea9d37897791a77a7c92eb96d10052 >BUG: 1245142 >Signed-off-by: anand <anekkunt@redhat.com> >Reviewed-on: http://review.gluster.org/11728 >Reviewed-by: Atin Mukherjee <amukherj@redhat.com> >Tested-by: NetBSD Build System <jenkins@build.gluster.org> >Tested-by: Gluster Build System <jenkins@build.gluster.com> >(cherry picked from commit 65e6ab1bfbbec7755f7ae2294cb83334ac65a296) Change-Id: I376d6c791b0200a8371f590d29c3e950658a02c7 BUG: 1249925 Reviewed-on: http://review.gluster.org/11823 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
-rw-r--r--tests/bugs/glusterd/bug-1245142-rebalance_test.t28
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c77
2 files changed, 89 insertions, 16 deletions
diff --git a/tests/bugs/glusterd/bug-1245142-rebalance_test.t b/tests/bugs/glusterd/bug-1245142-rebalance_test.t
new file mode 100644
index 00000000000..a28810ea71c
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1245142-rebalance_test.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+
+cleanup;
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume rebalance $V0 start &
+#kill glusterd2 after requst sent, so that call back is called
+#with rpc->status fail ,so roughly 1sec delay is introduced to get this scenario.
+sleep 1
+kill_glusterd 2
+#check glusterd commands are working after rebalance start command
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 12edd7feccf..e93b7b6cc6d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -1131,14 +1131,17 @@ __glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
uuid_t *txn_id = NULL;
+ call_frame_t *frame = NULL;
this = THIS;
GF_ASSERT (this);
GF_ASSERT (req);
priv = this->private;
GF_ASSERT (priv);
+ GF_ASSERT(myframe);
- txn_id = &priv->global_txn_id;
+ frame = myframe;
+ txn_id = frame->cookie;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -1196,10 +1199,6 @@ out:
uuid_utoa (rsp.uuid));
}
- ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
- gf_msg_debug (this->name, 0, "transaction ID = %s",
- uuid_utoa (*txn_id));
-
rcu_read_lock ();
peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL);
if (peerinfo == NULL) {
@@ -1247,6 +1246,7 @@ out:
} else {
free (rsp.dict.dict_val); //malloced by xdr
}
+ GF_FREE (frame->cookie);
GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
return ret;
}
@@ -1275,14 +1275,17 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
glusterd_conf_t *priv = NULL;
uuid_t *txn_id = NULL;
glusterd_op_info_t txn_op_info = {{0},};
+ call_frame_t *frame = NULL;
this = THIS;
GF_ASSERT (this);
GF_ASSERT (req);
priv = this->private;
GF_ASSERT (priv);
+ GF_ASSERT(myframe);
- txn_id = &priv->global_txn_id;
+ frame = myframe;
+ txn_id = frame->cookie;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -1340,9 +1343,6 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
"Received commit ACC from uuid: %s",
uuid_utoa (rsp.uuid));
}
- ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
- gf_msg_debug (this->name, 0, "transaction ID = %s",
- uuid_utoa (*txn_id));
ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
if (ret) {
@@ -1416,6 +1416,7 @@ out:
if (dict)
dict_unref (dict);
free (rsp.op_errstr); //malloced by xdr
+ GF_FREE (frame->cookie);
GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
return ret;
}
@@ -1900,9 +1901,9 @@ glusterd_stage_op (call_frame_t *frame, xlator_t *this,
int ret = -1;
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_conf_t *priv = NULL;
- call_frame_t *dummy_frame = NULL;
dict_t *dict = NULL;
gf_boolean_t is_alloc = _gf_true;
+ uuid_t *txn_id = NULL;
if (!this) {
goto out;
@@ -1932,13 +1933,34 @@ glusterd_stage_op (call_frame_t *frame, xlator_t *this,
"to request buffer");
goto out;
}
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ }
+ if (!frame)
+ frame = create_frame (this, this->ctx->pool);
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
+ if (!frame) {
+ ret = -1;
goto out;
+ }
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (frame->cookie, *txn_id);
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
peerinfo->mgmt, GLUSTERD_MGMT_STAGE_OP,
NULL,
this, glusterd_stage_op_cbk,
@@ -1963,6 +1985,7 @@ glusterd_commit_op (call_frame_t *frame, xlator_t *this,
call_frame_t *dummy_frame = NULL;
dict_t *dict = NULL;
gf_boolean_t is_alloc = _gf_true;
+ uuid_t *txn_id = NULL;
if (!this) {
goto out;
@@ -1991,12 +2014,34 @@ glusterd_commit_op (call_frame_t *frame, xlator_t *this,
"request buffer");
goto out;
}
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug (this->name, 0,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ }
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
+ if (!frame)
+ frame = create_frame (this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
goto out;
+ }
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy (frame->cookie, *txn_id);
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
peerinfo->mgmt, GLUSTERD_MGMT_COMMIT_OP,
NULL,
this, glusterd_commit_op_cbk,