summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
diff options
context:
space:
mode:
authorAnuradha <atalur@redhat.com>2015-06-05 16:46:39 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-06-25 22:55:52 -0700
commit4f76b8d11a93cfb74667f6b3051186b86c1ec55b (patch)
treec2251de58d546977b2d353987bc6ce9313030291 /xlators/mgmt/glusterd/src/glusterd-replace-brick.c
parentc9cbddf3d936f2bf1ac46084a74051096a5ac9a3 (diff)
glusterd/ afr : set afr pending xattrs on replace brick
This patch is part one change to prevent data loss in a replicate volume on doing a replace-brick commit force operation. Problem: After doing replace-brick commit force, there is a chance that self heal happens from the replaced (sink) brick rather than the source brick leading to data loss. Solution: During the commit phase of replace brick, after old brick is brought down, create a temporary mount and perform setfattr operation (on virtual xattr) indicating AFR to mark the replaced brick as sink. As a part of this change replace-brick command is being changed to use mgmt_v3 framework rather than op-state-machine framework. Many thanks to Krishnan Parthasarathi for helping me out on this. Change-Id: If0d51b5b3cef5b34d5672d46ea12eaa9d35fd894 BUG: 1207829 Signed-off-by: Anuradha <atalur@redhat.com> Reviewed-on: http://review.gluster.org/10076 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-replace-brick.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c298
1 files changed, 185 insertions, 113 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 59113cbe104..1dbd82c4364 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -21,6 +21,7 @@
#include "glusterd-nfs-svc.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
+#include "glusterd-mgmt.h"
#include "run.h"
#include "syscall.h"
@@ -34,6 +35,79 @@
extern uuid_t global_txn_id;
int
+glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict);
+int
+glusterd_handle_replicate_replace_brick (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ char tmpmount[] = "/tmp/mntXXXXXX";
+ char logfile[PATH_MAX] = {0,};
+ int dirty[3] = {0,};
+ runner_t runner = {0};
+ glusterd_conf_t *priv = NULL;
+ char *pid = NULL;
+
+ priv = THIS->private;
+
+ dirty[2] = hton32(1);
+
+ ret = sys_lsetxattr (brickinfo->path, GF_AFR_DIRTY, dirty,
+ sizeof (dirty), 0);
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set extended"
+ " attribute %s : %s.", GF_AFR_DIRTY, strerror (errno));
+ goto out;
+ }
+
+ if (mkdtemp (tmpmount) == NULL) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to create a temporary mount directory.");
+ ret = -1;
+ goto out;
+ }
+ snprintf (logfile, sizeof (logfile),
+ DEFAULT_LOG_FILE_DIRECTORY"/%s-replace-brick-mount.log",
+ volinfo->volname);
+
+ ret = gf_asprintf (&pid, "%d", GF_CLIENT_PID_AFR_SELF_HEALD);
+ if (ret < 0)
+ goto out;
+
+ runinit (&runner);
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", "localhost",
+ "--volfile-id", volinfo->volname,
+ "--client-pid", pid,
+ "-l", logfile, tmpmount, NULL);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+
+ if (ret) {
+ runner_log (&runner, THIS->name, GF_LOG_ERROR, "mount command"
+ "failed.");
+ goto lock;
+ }
+ ret = sys_lsetxattr (tmpmount, GF_AFR_REPLACE_BRICK,
+ brickinfo->brick_id, sizeof (brickinfo->brick_id),
+ 0);
+ if (ret == -1)
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set extended"
+ " attribute %s : %s", GF_AFR_REPLACE_BRICK,
+ strerror (errno));
+ gf_umount_lazy (THIS->name, tmpmount, 1);
+lock:
+ synclock_lock (&priv->big_lock);
+out:
+ if (pid)
+ GF_FREE (pid);
+ gf_log ("", GF_LOG_DEBUG, "Returning with ret");
+ return ret;
+}
+
+int
__glusterd_handle_replace_brick (rpcsvc_request_t *req)
{
int32_t ret = -1;
@@ -121,22 +195,12 @@ __glusterd_handle_replace_brick (rpcsvc_request_t *req)
gf_log (this->name, GF_LOG_INFO, "Received replace brick commit-force "
"request operation");
- ret = glusterd_op_begin (req, GD_OP_REPLACE_BRICK, dict,
- msg, sizeof (msg));
+ ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (req,
+ GD_OP_REPLACE_BRICK, dict);
out:
free (cli_req.dict.dict_val);//malloced by xdr
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- if (ret) {
- if (msg[0] == '\0')
- snprintf (msg, sizeof (msg), "Operation failed");
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- dict, msg);
- }
-
return ret;
}
@@ -183,7 +247,6 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_brickinfo_t *dst_brickinfo = NULL;
gf_boolean_t enabled = _gf_false;
- dict_t *ctx = NULL;
glusterd_conf_t *priv = NULL;
char *savetok = NULL;
char pidfile[PATH_MAX] = {0};
@@ -287,8 +350,6 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
goto out;
}
- ctx = glusterd_op_get_ctx();
-
if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
is_force = _gf_true;
} else {
@@ -305,7 +366,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
goto out;
}
- if (ctx) {
+ if (dict) {
if (!glusterd_is_fuse_available ()) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_RB_CMD_FAIL, "Unable to open /dev/"
@@ -488,7 +549,6 @@ rb_update_srcbrick_port (glusterd_volinfo_t *volinfo,
dict_t *rsp_dict, dict_t *req_dict, char *replace_op)
{
xlator_t *this = NULL;
- dict_t *ctx = NULL;
int ret = 0;
int dict_ret = 0;
int src_port = 0;
@@ -531,9 +591,8 @@ rb_update_srcbrick_port (glusterd_volinfo_t *volinfo,
}
}
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = dict_set_int32 (ctx, "src-brick-port",
+ if (req_dict) {
+ ret = dict_set_int32 (req_dict, "src-brick-port",
src_brickinfo->port);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -553,7 +612,6 @@ static int
rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict,
dict_t *req_dict, char *replace_op)
{
- dict_t *ctx = NULL;
int ret = 0;
int dict_ret = 0;
int dst_port = 0;
@@ -576,9 +634,8 @@ rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict,
}
}
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = dict_set_int32 (ctx, "dst-brick-port",
+ if (req_dict) {
+ ret = dict_set_int32 (req_dict, "dst-brick-port",
dst_brickinfo->port);
if (ret) {
gf_log ("", GF_LOG_DEBUG,
@@ -653,6 +710,16 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ /* if the volume is a replicate volume, do: */
+ if (glusterd_is_volume_replicate (volinfo)) {
+ if (!gf_uuid_compare (new_brickinfo->uuid, MY_UUID)) {
+ ret = glusterd_handle_replicate_replace_brick
+ (volinfo, new_brickinfo);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
ret = glusterd_create_volfiles_and_notify_services (volinfo);
if (ret)
goto out;
@@ -759,17 +826,6 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)
if (ret)
goto out;
- /* Set task-id, if available, in op_ctx dict for*/
- if (is_origin_glusterd (dict)) {
- ctx = glusterd_op_get_ctx();
- if (!ctx) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_OPCTX_GET_FAIL, "Failed to "
- "get op_ctx");
- ret = -1;
- goto out;
- }
- }
ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
dict, replace_op);
if (ret)
@@ -834,119 +890,135 @@ out:
return ret;
}
-void
-glusterd_do_replace_brick (void *data)
+int
+glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict)
{
- glusterd_volinfo_t *volinfo = NULL;
- int32_t src_port = 0;
- int32_t dst_port = 0;
- int32_t ret = 0;
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *volname = NULL;
- glusterd_brickinfo_t *src_brickinfo = NULL;
- glusterd_brickinfo_t *dst_brickinfo = NULL;
- glusterd_conf_t *priv = NULL;
- uuid_t *txn_id = NULL;
- xlator_t *this = NULL;
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+ char *cli_errstr = NULL;
+ char *op_errstr = NULL;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ uuid_t *originator_uuid = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t success = _gf_false;
+ gf_boolean_t is_acquired = _gf_false;
this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
- GF_ASSERT (data);
-
- txn_id = &priv->global_txn_id;
- dict = data;
-
- if (priv->timer) {
- gf_timer_call_cancel (THIS->ctx, priv->timer);
- priv->timer = NULL;
- gf_msg_debug ("", 0,
- "Cancelling timer thread");
- }
-
- gf_msg_debug (this->name, 0,
- "Replace brick operation detected");
-
- ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
- gf_msg_debug (this->name, 0, "transaction ID = %s",
- uuid_utoa (*txn_id));
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ conf = this->private;
+ GF_ASSERT (conf);
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_msg ("", GF_LOG_ERROR, 0,
- GD_MSG_DICT_GET_FAILED, "Unable to get src brick");
+ txn_generation = conf->generation;
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
goto out;
}
- gf_msg_debug (this->name, 0,
- "src brick=%s", src_brick);
-
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ gf_uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
if (ret) {
- gf_msg ("", GF_LOG_ERROR, 0,
- GD_MSG_DICT_GET_FAILED, "Unable to get dst brick");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
goto out;
}
- gf_msg_debug (this->name, 0,
- "dst brick=%s", dst_brick);
-
- ret = glusterd_volinfo_find (volname, &volinfo);
+ ret = dict_set_int32 (dict, "is_synctasked", _gf_true);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, EINVAL,
- GD_MSG_VOLINFO_GET_FAIL, "Unable to find volinfo");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag to true.");
goto out;
}
- ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
- &src_brickinfo);
- if (ret) {
- gf_msg_debug (this->name, 0, "Unable to get src-brickinfo");
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_CREATE_FAIL, "Unable to create dict");
goto out;
}
+ dict_copy (dict, tmp_dict);
- ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo);
- if (!dst_brickinfo) {
- gf_msg_debug (this->name, 0, "Unable to get dst-brickinfo");
+ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr,
+ &op_errno, &is_acquired,
+ txn_generation);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
goto out;
}
- ret = glusterd_resolve_brick (dst_brickinfo);
+ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op);
if (ret) {
- gf_msg_debug (this->name, 0, "Unable to resolve dst-brickinfo");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
goto out;
}
- ret = dict_get_int32 (dict, "src-brick-port", &src_port);
+ ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr,
+ &op_errno, txn_generation);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_GET_FAILED, "Unable to get src-brick port");
+ GD_MSG_PRE_VALIDATION_FAIL, "Pre Validation Failed");
goto out;
}
- ret = dict_get_int32 (dict, "dst-brick-port", &dst_port);
+ ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, errno,
- GD_MSG_DICT_GET_FAILED, "Unable to get dst-brick port");
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_COMMIT_OP_FAIL, "Commit Op Failed");
+ goto out;
}
- dst_brickinfo->port = dst_port;
- src_brickinfo->port = src_port;
+ ret = 0;
out:
- if (ret)
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
- txn_id, NULL);
- else
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC,
- txn_id, NULL);
+ op_ret = ret;
- synclock_lock (&priv->big_lock);
- {
- glusterd_op_sm ();
+ (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret,
+ &op_errstr, is_acquired,
+ txn_generation);
+
+ if (is_acquired) {
+ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on "
+ "localhost.");
+ op_ret = ret;
+ }
}
- synclock_unlock (&priv->big_lock);
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response (op, op_ret, op_errno, req,
+ dict, op_errstr);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
}