summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-mgmt.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-mgmt.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c431
1 files changed, 412 insertions, 19 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index bf9b5a870a0..bca7221062b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -86,6 +86,11 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
peer_str, err_string);
break;
}
+ case GLUSTERD_MGMT_V3_POST_COMMIT: {
+ snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
case GLUSTERD_MGMT_V3_POST_VALIDATE: {
snprintf(op_err, sizeof(op_err),
"Post Validation failed on %s. %s", peer_str,
@@ -187,6 +192,16 @@ gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
goto out;
}
break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Remove brick prevalidation failed.");
+ goto out;
+ }
+ break;
+
case GD_OP_RESET_BRICK:
ret = glusterd_reset_brick_prevalidate(dict, op_errstr, rsp_dict);
if (ret) {
@@ -337,6 +352,15 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
}
break;
}
+ case GD_OP_REMOVE_BRICK: {
+ ret = glusterd_op_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Remove-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
case GD_OP_RESET_BRICK: {
ret = glusterd_op_reset_brick(dict, rsp_dict);
if (ret) {
@@ -386,6 +410,47 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ switch (op) {
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_post_commit_add_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Add-brick post commit failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_post_commit_replace_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Replace-brick post commit failed.");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict)
{
@@ -582,15 +647,21 @@ gd_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK,
@@ -759,6 +830,7 @@ glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
goto out;
}
case GD_OP_STOP_VOLUME:
+ case GD_OP_REMOVE_BRICK:
case GD_OP_PROFILE_VOLUME:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_REBALANCE:
@@ -897,15 +969,21 @@ gd_mgmt_v3_pre_validate_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(
peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
@@ -948,7 +1026,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
}
if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME ||
- op == GD_OP_REBALANCE) {
+ op == GD_OP_REBALANCE || op == GD_OP_REMOVE_BRICK) {
ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
@@ -1076,6 +1154,7 @@ glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict,
case GD_OP_START_VOLUME:
case GD_OP_STOP_VOLUME:
case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_REPLACE_BRICK:
case GD_OP_RESET_BRICK:
@@ -1258,15 +1337,21 @@ gd_mgmt_v3_brick_op_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_BRICK_OP,
@@ -1515,15 +1600,21 @@ gd_mgmt_v3_commit_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_COMMIT,
@@ -1559,12 +1650,25 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
GF_ASSERT(op_errstr);
GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
- if (op == GD_OP_REBALANCE || op == GD_OP_DEFRAG_BRICK_VOLUME) {
- ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to set rebalance id in dict.");
- }
+ switch (op) {
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+
+ ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id in dict.");
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_set_rebalance_id_for_remove_brick(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id for remove-brick in dict.");
+ }
+ break;
+ default:
+ break;
}
rsp_dict = dict_new();
if (!rsp_dict) {
@@ -1662,6 +1766,274 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_post_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Post commit on local node */
+ ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Post commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending post commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+ if (!peerinfo->connected)
+ continue;
+
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent post commit req for %s to %d "
+ "peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ glusterd_op_modify_op_ctx(op, op_ctx);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -1751,16 +2123,22 @@ gd_mgmt_v3_post_validate_req(glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
req.op_ret = op_ret;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(
peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
@@ -1967,15 +2345,21 @@ gd_mgmt_v3_unlock(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK,
@@ -2338,6 +2722,15 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
+ /* POST COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit Op Failed");
+ goto out;
+ }
+
/* POST-COMMIT VALIDATE PHASE */
/* As of now, post_validate is not trying to cleanup any failed
commands. So as of now, I am sending 0 (op_ret as 0).