summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
diff options
context:
space:
mode:
authorAnuradha Talur <atalur@redhat.com>2016-08-22 13:22:03 -0400
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-08-29 19:55:53 -0700
commit936f8aeac3252951e7fa0cdaa5d260fad3bd5ea0 (patch)
tree66240b8ef55d55ca9535a21f73a5f71c9ff6ade4 /xlators/mgmt/glusterd/src/glusterd-reset-brick.c
parentc204f452dfd9907a0d32f35294a0805701a6d993 (diff)
glusterd : Introduce reset brick
The command basically allows replace brick with src and dst bricks as same. Usage: gluster v reset-brick <volname> <hostname:brick-path> start This command kills the brick to be reset. Once this command is run, admin can do other manual operations that they need to do, like configuring some options for the brick. Once this is done, resetting the brick can be continued with the following options. gluster v reset-brick <vname> <hostname:brick> <hostname:brick> commit {force} Does the job of resetting the brick. 'force' option should be used when the brick already contains volinfo id. Problem: On doing a disk-replacement of a brick in a replicate volume the following 2 scenarios may occur : a) there is a chance that reads are served from this replaced-disk brick, which leads to empty reads. b) potential data loss if next writes succeed only on replaced brick, and heal is done to other bricks from this one. Solution: After disk-replacement, make sure that reset-brick command is run for that brick so that pending markers are set for the brick and it is not chosen as source for reads and heal. But, as of now replace-brick for the same brick-path is not allowed. In order to fix the above mentioned problem, same brick-path replace-brick is needed. With this patch reset-brick commit {force} will be allowed even when source and destination <hostname:brickpath> are identical as long as 1) destination brick is not alive 2) source and destination brick have the same brick uuid and path. Also, the destination brick after replace-brick will use the same port as the source brick. Change-Id: I440b9e892ffb781ea4b8563688c3f85c7a7c89de BUG: 1266876 Signed-off-by: Anuradha Talur <atalur@redhat.com> Reviewed-on: http://review.gluster.org/12250 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Ashish Pandey <aspandey@redhat.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-reset-brick.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c430
1 files changed, 430 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
new file mode 100644
index 00000000000..d1efe0663fb
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
@@ -0,0 +1,430 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterfs.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "glusterd-mgmt.h"
+#include "run.h"
+#include "syscall.h"
+
+#include <signal.h>
+
+int
+glusterd_reset_brick_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ int32_t port = 0;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ char *op = NULL;
+ glusterd_op_t gd_op = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ char *host = NULL;
+ char msg[2048] = {0};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ gf_boolean_t enabled = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ char *savetok = NULL;
+ char pidfile[PATH_MAX] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_force = _gf_false;
+ gsync_status_param_t param = {0,};
+ pid_t pid = -1;
+ uuid_t volume_id = {0,};
+ char *dup_dstbrick = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_brick_op_prerequisites (dict, &op, &gd_op,
+ &volname, &volinfo,
+ &src_brick, &src_brickinfo,
+ pidfile,
+ op_errstr, rsp_dict);
+ if (ret)
+ goto out;
+
+ if (!strcmp (op, "GF_RESET_OP_START"))
+ goto done;
+
+ if (!strcmp (op, "GF_RESET_OP_COMMIT_FORCE"))
+ is_force = _gf_true;
+
+ ret = glusterd_get_dst_brick_info (&dst_brick, volname,
+ op_errstr,
+ &dst_brickinfo, &host,
+ dict, &dup_dstbrick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate (dst_brick, dst_brickinfo,
+ msg, sizeof (msg), op);
+ /* if bricks are not same and reset brick was used, fail command.
+ * Only replace brick should be used to replace with new bricks
+ * to the volume.
+ */
+ if (ret == 0) {
+ if (!gf_uuid_compare (MY_UUID, dst_brickinfo->uuid)) {
+ ret = -1;
+ *op_errstr = gf_strdup
+ ("When destination brick is new,"
+ " please use"
+ " gluster volume "
+ "replace-brick <volname> "
+ "<src-brick> <dst-brick> "
+ "commit force");
+ if (*op_errstr)
+ gf_msg (this->name,
+ GF_LOG_ERROR,
+ EPERM,
+ GD_MSG_BRICK_VALIDATE_FAIL,
+ "%s", *op_errstr);
+ goto out;
+ }
+ } else if (ret == 1) {
+ if (gf_is_service_running (pidfile, &pid)) {
+ ret = -1;
+ *op_errstr = gf_strdup
+ ("Source brick"
+ " must be stopped."
+ " Please use "
+ "gluster volume "
+ "reset-brick <volname> "
+ "<dst-brick> start.");
+ if (*op_errstr)
+ gf_msg (this->name,
+ GF_LOG_ERROR,
+ EPERM,
+ GD_MSG_BRICK_VALIDATE_FAIL,
+ "%s", *op_errstr);
+ goto out;
+ }
+ ret = sys_lgetxattr (dst_brickinfo->path,
+ GF_XATTR_VOL_ID_KEY,
+ volume_id, 16);
+ if (gf_uuid_compare (dst_brickinfo->uuid,
+ src_brickinfo->uuid) ||
+ (ret >= 0 && is_force == _gf_false)) {
+ ret = -1;
+ *op_errstr = gf_strdup ("Brick not available."
+ "It may be containing "
+ "or be contained "
+ "by an existing brick."
+ "Use 'force' option to "
+ "override this.");
+ if (*op_errstr)
+ gf_msg (this->name,
+ GF_LOG_ERROR,
+ EPERM,
+ GD_MSG_BRICK_VALIDATE_FAIL,
+ "%s", *op_errstr);
+ goto out;
+ }
+ ret = 0;
+ } else {
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr);
+ goto out;
+ }
+
+ volinfo->rep_brick.src_brick = src_brickinfo;
+ volinfo->rep_brick.dst_brick = dst_brickinfo;
+
+ if (gf_is_local_addr (host)) {
+ ret = glusterd_validate_and_create_brickpath
+ (dst_brickinfo,
+ volinfo->volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+ } else {
+ rcu_read_lock ();
+
+ peerinfo = glusterd_peerinfo_find (NULL, host);
+ if (peerinfo == NULL) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "%s, is not a friend.",
+ host);
+ *op_errstr = gf_strdup (msg);
+
+ } else if (!peerinfo->connected) {
+ snprintf (msg, sizeof (msg), "%s,"
+ "is not connected at "
+ "the moment.", host);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+
+ } else if (GD_FRIEND_STATE_BEFRIENDED !=
+ peerinfo->state.state) {
+ snprintf (msg, sizeof (msg),
+ "%s, is not befriended "
+ "at the moment.", host);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ }
+ rcu_read_unlock ();
+
+ if (ret)
+ goto out;
+
+ }
+
+ ret = glusterd_get_brick_mount_dir
+ (dst_brickinfo->path,
+ dst_brickinfo->hostname,
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir.");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (rsp_dict,
+ "brick1.mount_dir",
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick1.mount_dir");
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "brick_count", 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count.");
+ goto out;
+ }
+
+done:
+ ret = 0;
+out:
+ GF_FREE (dup_dstbrick);
+ gf_msg_debug (this->name, 0, "Returning %d.", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_reset_brick (dict_t *dict, dict_t *rsp_dict)
+{
+ int ret = 0;
+ dict_t *ctx = NULL;
+ char *op = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ char *task_id_str = NULL;
+ char pidfile[PATH_MAX] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "operation", &op);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "src brick=%s", src_brick);
+
+ ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
+ &src_brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Unable to get src-brickinfo");
+ goto out;
+ }
+
+ if (!strcmp (op, "GF_RESET_OP_START")) {
+ (void) glusterd_brick_disconnect (src_brickinfo);
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ src_brickinfo, priv);
+ ret = glusterd_service_stop ("brick", pidfile,
+ SIGTERM, _gf_false);
+ if (ret == 0) {
+ glusterd_set_brick_status (src_brickinfo,
+ GF_BRICK_STOPPED);
+ (void) glusterd_brick_unlink_socket_file
+ (volinfo, src_brickinfo);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_BRICK_CLEANUP_SUCCESS,
+ "Brick cleanup successful.");
+ } else {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_BRK_CLEANUP_FAIL,
+ "Unable to cleanup src brick");
+ goto out;
+ }
+ goto out;
+ } else if (!strcmp (op, "GF_RESET_OP_COMMIT") ||
+ !strcmp (op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED,
+ "Unable to get dst brick");
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick);
+
+ ret = glusterd_get_rb_dst_brickinfo (volinfo,
+ &dst_brickinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RB_BRICKINFO_GET_FAIL,
+ "Unable to get "
+ "reset brick "
+ "destination brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_resolve_brick (dst_brickinfo);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Unable to resolve dst-brickinfo");
+ goto out;
+ }
+
+ ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
+ dict);
+ if (ret)
+ goto out;
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_msg_debug (this->name, 0, "I AM THE DESTINATION HOST");
+ (void) glusterd_brick_disconnect (src_brickinfo);
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ src_brickinfo, priv);
+ ret = glusterd_service_stop ("brick", pidfile,
+ SIGTERM, _gf_false);
+ if (ret == 0) {
+ glusterd_set_brick_status
+ (src_brickinfo, GF_BRICK_STOPPED);
+ (void) glusterd_brick_unlink_socket_file
+ (volinfo, src_brickinfo);
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_BRICK_CLEANUP_SUCCESS,
+ "Brick cleanup successful.");
+ } else {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_BRK_CLEANUP_FAIL,
+ "Unable to cleanup src brick");
+ goto out;
+ }
+ }
+
+ ret = glusterd_svcs_stop (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Unable to stop gluster services, ret: %d",
+ ret);
+ goto out;
+ }
+ ret = glusterd_op_perform_replace_brick (volinfo, src_brick,
+ dst_brick, dict);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_BRICK_ADD_FAIL,
+ "Unable to add dst-brick: "
+ "%s to volume: %s", dst_brick,
+ volinfo->volname);
+ (void) glusterd_svcs_manager (volinfo);
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+
+ ret = glusterd_svcs_manager (volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start one or more gluster services.");
+ }
+
+
+ ret = glusterd_fetchspec_notify (THIS);
+ glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick);
+ volinfo->rep_brick.src_brick = NULL;
+ volinfo->rep_brick.dst_brick = NULL;
+
+ if (!ret)
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RBOP_STATE_STORE_FAIL,
+ "Couldn't store"
+ " reset brick operation's state.");
+
+ }
+ } else {
+ ret = -1;
+ goto out;
+ }
+
+
+out:
+ return ret;
+}