1 files changed, 1291 insertions, 718 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 7f4a53a1702..458bf168ede 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -1,849 +1,1422 @@
 /*
-  Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
-  This file is part of GlusterFS.
-
-  GlusterFS is free software; you can redistribute it and/or modify
-  it under the terms of the GNU Affero General Public License as published
-  by the Free Software Foundation; either version 3 of the License,
-  or (at your option) any later version.
-
-  GlusterFS is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Affero General Public License for more details.
-
-  You should have received a copy of the GNU Affero General Public License
-  along with this program.  If not, see
-  <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+   Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
 
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
 #include <inttypes.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/resource.h>
 #include <sys/statvfs.h>
 
-#include "globals.h"
-#include "compat.h"
+#include <glusterfs/compat.h>
 #include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
 #include "glusterd-mem-types.h"
 #include "glusterd.h"
 #include "glusterd-sm.h"
 #include "glusterd-op-sm.h"
 #include "glusterd-utils.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-messages.h"
 #include "glusterd-store.h"
-#include "run.h"
-
-#include "syscall.h"
-#include "cli1.h"
-
-#define GF_DISK_SECTOR_SIZE 512
-
-static int
-write_with_holes (int fd, const char *buf, int size, off_t offset)
+#include <glusterfs/run.h>
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+
+#include <glusterfs/syscall.h>
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo)                           \
+    do {                                                                       \
+        int32_t _defrag_sockfile_len;                                          \
+        char tmppath[PATH_MAX] = {                                             \
+            0,                                                                 \
+        };                                                                     \
+        _defrag_sockfile_len = snprintf(                                       \
+            tmppath, PATH_MAX,                                                 \
+            DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", "rebalance",   \
+            volinfo->volname, uuid_utoa(MY_UUID));                             \
+        if ((_defrag_sockfile_len < 0) ||                                      \
+            (_defrag_sockfile_len >= PATH_MAX)) {                              \
+            path[0] = 0;                                                       \
+        } else {                                                               \
+            glusterd_set_socket_filepath(tmppath, path, sizeof(path));         \
+        }                                                                      \
+    } while (0)
+
+int32_t
+glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+                      void *myframe);
+int
+glusterd_defrag_start_validate(glusterd_volinfo_t *volinfo, char *op_errstr,
+                               size_t len, glusterd_op_t op)
 {
-        int ret          = -1;
-        int start_idx    = 0;
-        int tmp_offset   = 0;
-        int write_needed = 0;
-
-        for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= size;
-             start_idx += GF_DISK_SECTOR_SIZE) {
-                /* Check if a block has full '0's. assume as hole if true */
-                if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) == 0) {
-                        write_needed = 1;
-                        continue;
-                }
-
-                if (write_needed) {
-                        ret = write (fd, buf + tmp_offset,
-                                     (start_idx - tmp_offset));
-                        if (ret < 0)
-                                goto out;
-
-                        write_needed = 0;
-                }
-                tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
-
-                ret = lseek (fd, (offset + tmp_offset), SEEK_SET);
-                if (ret < 0)
-                        goto out;
-        }
-
-        if ((start_idx < size) || write_needed) {
-                /* This means, last chunk is not yet written.. write it */
-                ret = write (fd, buf + tmp_offset, (size - tmp_offset));
-                if (ret < 0)
-                        goto out;
-        }
-
-        /* do it regardless of all the above cases as we had to 'write' the
-           given number of bytes */
-        ret = ftruncate (fd, offset + size);
-        if (ret)
-                goto out;
-
-        ret = 0;
+    int ret = -1;
+    xlator_t *this = NULL;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    /* Check only if operation is not remove-brick */
+    if ((GD_OP_REMOVE_BRICK != op) && !gd_is_remove_brick_committed(volinfo)) {
+        gf_msg_debug(this->name, 0,
+                     "A remove-brick task on "
+                     "volume %s is not yet committed",
+                     volinfo->volname);
+        snprintf(op_errstr, len,
+                 "A remove-brick task on volume %s is"
+                 " not yet committed. Either commit or stop the "
+                 "remove-brick task.",
+                 volinfo->volname);
+        goto out;
+    }
+
+    if (glusterd_is_defrag_on(volinfo)) {
+        gf_msg_debug(this->name, 0, "rebalance on volume %s already started",
+                     volinfo->volname);
+        snprintf(op_errstr, len, "Rebalance on %s is already started",
+                 volinfo->volname);
+        goto out;
+    }
+
+    ret = 0;
 out:
-        return ret;
-
+    gf_msg_debug(this->name, 0, "Returning %d", ret);
+    return ret;
 }
-int
-gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
-{
-        int                     ret                    = -1;
-        int                     dst_fd                 = -1;
-        int                     src_fd                 = -1;
-        DIR                    *fd                     = NULL;
-        glusterd_defrag_info_t *defrag                 = NULL;
-        struct dirent          *entry                  = NULL;
-        struct stat             stbuf                  = {0,};
-        struct stat             new_stbuf              = {0,};
-        char                    full_path[PATH_MAX]    = {0,};
-        char                    tmp_filename[PATH_MAX] = {0,};
-        char                    value[16]              = {0,};
-        char                    linkinfo[PATH_MAX]     = {0,};
-        struct statvfs          src_statfs             = {0,};
-        struct statvfs          dst_statfs             = {0,};
-        int                     file_has_holes         = 0;
-        off_t                   offset                 = 0;
-
-        if (!volinfo->defrag)
-                goto out;
 
-        defrag = volinfo->defrag;
+int32_t
+__glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+                         rpc_clnt_event_t event, void *data)
+{
+    glusterd_volinfo_t *volinfo = NULL;
+    glusterd_defrag_info_t *defrag = NULL;
+    int ret = 0;
+    char pidfile[PATH_MAX];
+    glusterd_conf_t *priv = NULL;
+    xlator_t *this = NULL;
+    int pid = -1;
+
+    this = THIS;
+    if (!this)
+        return 0;
 
-        fd = opendir (dir);
-        if (!fd)
-                goto out;
-        while ((entry = readdir (fd))) {
-                if (!entry)
-                        break;
-
-                if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
-                        continue;
-
-                snprintf (full_path, PATH_MAX, "%s/%s", dir, entry->d_name);
-
-                ret = stat (full_path, &stbuf);
-                if (ret == -1)
-                        continue;
-
-                if (!S_ISREG (stbuf.st_mode))
-                        continue;
-
-                defrag->num_files_lookedup += 1;
-
-                if (stbuf.st_nlink > 1)
-                        continue;
-
-                /* if distribute is present, it will honor this key.
-                   -1 is returned if distribute is not present or file doesn't
-                   have a link-file. If file has link-file, the path of
-                   link-file will be the value  */
-                ret = sys_lgetxattr (full_path, GF_XATTR_LINKINFO_KEY,
-                                     &linkinfo, PATH_MAX);
-                if (ret <= 0)
-                        continue;
-
-                /* If the file is open, don't run rebalance on it */
-                ret = sys_lgetxattr (full_path, GLUSTERFS_OPEN_FD_COUNT,
-                                     &value, 16);
-                if ((ret < 0) || !strncmp (value, "1", 1))
-                        continue;
-
-                /* If its a regular file, and sticky bit is set, we need to
-                   rebalance that */
-                snprintf (tmp_filename, PATH_MAX, "%s/.%s.gfs%llu", dir,
-                          entry->d_name,
-                          (unsigned long long)stbuf.st_size);
-
-                dst_fd = creat (tmp_filename, stbuf.st_mode);
-                if (dst_fd == -1)
-                        continue;
-
-                /* Prevent data movement from a node which has higher
-                   disk-space to a node with lesser */
-                if (defrag->cmd != GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) {
-                        ret = statvfs (full_path, &src_statfs);
-                        if (ret)
-                                gf_log ("", GF_LOG_WARNING,
-                                        "statfs on %s failed", full_path);
-
-                        ret = statvfs (tmp_filename, &dst_statfs);
-                        if (ret)
-                                gf_log ("", GF_LOG_WARNING,
-                                        "statfs on %s failed", tmp_filename);
-
-                        /* Calculate the size without the file in migration */
-                        if (((dst_statfs.f_bavail *
-                              dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) >
-                            (((src_statfs.f_bavail * src_statfs.f_bsize) /
-                              GF_DISK_SECTOR_SIZE) - stbuf.st_blocks)) {
-                                gf_log ("", GF_LOG_INFO,
-                                        "data movement attempted from node with"
-                                        " higher disk space to a node with "
-                                        "lesser disk space (%s)", full_path);
-
-                                close (dst_fd);
-                                unlink (tmp_filename);
-                                continue;
-                        }
-                }
+    priv = this->private;
+    if (!priv)
+        return 0;
 
-                src_fd = open (full_path, O_RDONLY);
-                if (src_fd == -1) {
-                        close (dst_fd);
-                        continue;
-                }
+    volinfo = mydata;
+    if (!volinfo)
+        return 0;
 
-                /* Try to preserve 'holes' while migrating data */
-                if (stbuf.st_size > (stbuf.st_blocks * GF_DISK_SECTOR_SIZE))
-                        file_has_holes = 1;
-
-                offset = 0;
-                while (1) {
-                        ret = read (src_fd, defrag->databuf, 128 * GF_UNIT_KB);
-                        if (!ret || (ret < 0)) {
-                                break;
-                        }
-
-                        if (!file_has_holes)
-                                ret = write (dst_fd, defrag->databuf, ret);
-                        else
-                                ret = write_with_holes (dst_fd, defrag->databuf,
-                                                        ret, offset);
-                        if (ret < 0)
-                                break;
-
-                        offset += ret;
-                }
+    defrag = volinfo->rebal.defrag;
+    if (!defrag)
+        return 0;
 
-                ret = stat (full_path, &new_stbuf);
-                if (ret < 0) {
-                        close (dst_fd);
-                        close (src_fd);
-                        continue;
-                }
-                /* No need to rebalance, if there is some
-                   activity on source file */
-                if (new_stbuf.st_mtime != stbuf.st_mtime) {
-                        close (dst_fd);
-                        close (src_fd);
-                        continue;
-                }
+    if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
+        volinfo->rebal.defrag = NULL;
 
-                ret = fchown (dst_fd, stbuf.st_uid, stbuf.st_gid);
-                if (ret) {
-                        gf_log ("", GF_LOG_WARNING,
-                                "failed to set the uid/gid of file %s: %s",
-                                tmp_filename, strerror (errno));
-                }
+    GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
 
-                ret = rename (tmp_filename, full_path);
-                if (ret != -1) {
-                        LOCK (&defrag->lock);
-                        {
-                                defrag->total_files += 1;
-                                defrag->total_data += stbuf.st_size;
-                        }
-                        UNLOCK (&defrag->lock);
-                }
+    switch (event) {
+        case RPC_CLNT_CONNECT: {
+            if (defrag->connected)
+                return 0;
 
-                close (dst_fd);
-                close (src_fd);
+            LOCK(&defrag->lock);
+            {
+                defrag->connected = 1;
+            }
+            UNLOCK(&defrag->lock);
 
-                if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) {
-                        closedir (fd);
-                        ret = -1;
-                        goto out;
-                }
+            gf_msg_debug(this->name, 0, "%s got RPC_CLNT_CONNECT",
+                         rpc->conn.name);
+            break;
         }
-        closedir (fd);
 
-        fd = opendir (dir);
-        if (!fd)
-                goto out;
-        while ((entry = readdir (fd))) {
-                if (!entry)
-                        break;
+        case RPC_CLNT_DISCONNECT: {
+            if (!defrag->connected)
+                return 0;
 
-                if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
-                        continue;
+            LOCK(&defrag->lock);
+            {
+                defrag->connected = 0;
+            }
+            UNLOCK(&defrag->lock);
 
-                snprintf (full_path, 1024, "%s/%s", dir, entry->d_name);
+            if (!gf_is_service_running(pidfile, &pid)) {
+                if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) {
+                    volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED;
+                }
+            }
 
-                ret = stat (full_path, &stbuf);
-                if (ret == -1)
-                        continue;
+            glusterd_store_perform_node_state_store(volinfo);
 
-                if (!S_ISDIR (stbuf.st_mode))
-                        continue;
+            rpc_clnt_disable(defrag->rpc);
+            glusterd_defrag_rpc_put(defrag);
+            if (defrag->cbk_fn)
+                defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
 
-                ret = gf_glusterd_rebalance_move_data (volinfo, full_path);
-                if (ret)
-                        break;
+            GF_FREE(defrag);
+            gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
+                   "Rebalance process for volume %s has disconnected.",
+                   volinfo->volname);
+            break;
         }
-        closedir (fd);
+        case RPC_CLNT_DESTROY:
+            glusterd_volinfo_unref(volinfo);
+            break;
+        default:
+            gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+            ret = 0;
+            break;
+    }
 
-        if (!entry)
-                ret = 0;
-out:
-        return ret;
+    return ret;
 }
 
-int
-gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)
+int32_t
+glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+                       rpc_clnt_event_t event, void *data)
 {
-        int            ret             = -1;
-        char           full_path[1024] = {0,};
-        struct stat    stbuf           = {0,};
-        DIR           *fd              = NULL;
-        struct dirent *entry           = NULL;
-
-        if (!volinfo->defrag)
-                goto out;
-
-        fd = opendir (dir);
-        if (!fd)
-                goto out;
-
-        while ((entry = readdir (fd))) {
-                if (!entry)
-                        break;
-
-                if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
-                        continue;
-
-                snprintf (full_path, 1024, "%s/%s", dir, entry->d_name);
-
-                ret = stat (full_path, &stbuf);
-                if (ret == -1)
-                        continue;
-
-                if (S_ISDIR (stbuf.st_mode)) {
-                        /* Fix the layout of the directory */
-                        sys_lsetxattr (full_path, "trusted.distribute.fix.layout",
-                                       "yes", 3, 0);
-
-                        volinfo->defrag->total_files += 1;
-
-                        /* Traverse into subdirectory */
-                        ret = gf_glusterd_rebalance_fix_layout (volinfo,
-                                                                full_path);
-                        if (ret)
-                                break;
-                }
-
-                if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) {
-                        closedir (fd);
-                        ret = -1;
-                        goto out;
-                }
-        }
-        closedir (fd);
-
-        if (!entry)
-                ret = 0;
-
-out:
-        return ret;
+    return glusterd_big_locked_notify(rpc, mydata, event, data,
+                                      __glusterd_defrag_notify);
 }
 
-void *
-glusterd_defrag_start (void *data)
+int
+glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
+                             size_t len, int cmd, defrag_cbk_fn_t cbk,
+                             glusterd_op_t op)
 {
-        glusterd_volinfo_t     *volinfo = data;
-        glusterd_defrag_info_t *defrag  = NULL;
-        int                     ret     = -1;
-        struct stat             stbuf   = {0,};
-
-        defrag = volinfo->defrag;
-        if (!defrag)
-                goto out;
-
-        sleep (1);
-        ret = stat (defrag->mount, &stbuf);
-        if ((ret == -1) && (errno == ENOTCONN)) {
-                /* Wait for some more time before starting rebalance */
-                sleep (2);
-                ret = stat (defrag->mount, &stbuf);
-                if (ret == -1) {
-                        volinfo->defrag_status   = GF_DEFRAG_STATUS_FAILED;
-                        volinfo->rebalance_files = 0;
-                        volinfo->rebalance_data  = 0;
-                        volinfo->lookedup_files  = 0;
-                        goto out;
-                }
-        }
-
-        /* Fix the root ('/') first */
-        sys_lsetxattr (defrag->mount, "trusted.distribute.fix.layout",
-                       "yes", 3, 0);
-
-        if ((defrag->cmd == GF_DEFRAG_CMD_START) ||
-            (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
-                /* root's layout got fixed */
-                defrag->total_files = 1;
-
-                /* Step 1: Fix layout of all the directories */
-                ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount);
-                if (ret) {
-                        volinfo->defrag_status   = GF_DEFRAG_STATUS_FAILED;
-                        goto out;
-                }
+    xlator_t *this = NULL;
+    int ret = -1;
+    glusterd_defrag_info_t *defrag = NULL;
+    runner_t runner = {
+        0,
+    };
+    glusterd_conf_t *priv = NULL;
+    char defrag_path[PATH_MAX];
+    char sockfile[PATH_MAX] = {
+        0,
+    };
+    char pidfile[PATH_MAX] = {
+        0,
+    };
+    char logfile[PATH_MAX] = {
+        0,
+    };
+    char volname[PATH_MAX] = {
+        0,
+    };
+    char valgrind_logfile[PATH_MAX] = {
+        0,
+    };
+    char msg[1024] = {
+        0,
+    };
+    char *volfileserver = NULL;
+    char *localtime_logging = NULL;
+
+    this = THIS;
+    GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+    priv = this->private;
+    GF_VALIDATE_OR_GOTO("glusterd", priv, out);
+
+    GF_ASSERT(volinfo);
+    GF_ASSERT(op_errstr);
+
+    ret = glusterd_defrag_start_validate(volinfo, op_errstr, len, op);
+    if (ret)
+        goto out;
+    if (!volinfo->rebal.defrag)
+        volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag),
+                                          gf_gld_mt_defrag_info);
+    if (!volinfo->rebal.defrag)
+        goto out;
+
+    defrag = volinfo->rebal.defrag;
+
+    defrag->cmd = cmd;
+
+    volinfo->rebal.defrag_cmd = cmd;
+    volinfo->rebal.op = op;
+
+    LOCK_INIT(&defrag->lock);
+
+    volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+    glusterd_volinfo_reset_defrag_stats(volinfo);
+    glusterd_store_perform_node_state_store(volinfo);
+
+    GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv);
+    ret = mkdir_p(defrag_path, 0755, _gf_true);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+               "Failed to create "
+               "directory %s",
+               defrag_path);
+        goto out;
+    }
+
+    GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+    GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+    snprintf(logfile, PATH_MAX, "%s/%s-%s.log", priv->logdir, volinfo->volname,
+             "rebalance");
+    runinit(&runner);
+
+    if (this->ctx->cmd_args.vgtool != _gf_none) {
+        snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log",
+                 priv->logdir, volinfo->volname);
+
+        if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+            runner_add_args(&runner, "valgrind", "--leak-check=full",
+                            "--trace-children=yes", "--track-origins=yes",
+                            NULL);
+        else
+            runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
+        runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+    }
+
+    snprintf(volname, sizeof(volname), "rebalance/%s", volinfo->volname);
+
+    if (dict_get_strn(this->options, "transport.socket.bind-address",
+                      SLEN("transport.socket.bind-address"),
+                      &volfileserver) != 0) {
+        volfileserver = "localhost";
+    }
+
+    runner_add_args(
+        &runner, SBIN_DIR "/glusterfs", "-s", volfileserver, "--volfile-id",
+        volname, "--xlator-option", "*dht.use-readdirp=yes", "--xlator-option",
+        "*dht.lookup-unhashed=yes", "--xlator-option",
+        "*dht.assert-no-child-down=yes", "--xlator-option",
+        "*dht.readdir-optimize=on", "--process-name", "rebalance", NULL);
+
+    runner_add_arg(&runner, "--xlator-option");
+    runner_argprintf(&runner, "*dht.rebalance-cmd=%d", cmd);
+    runner_add_arg(&runner, "--xlator-option");
+    runner_argprintf(&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID));
+    runner_add_arg(&runner, "--xlator-option");
+    runner_argprintf(&runner, "*dht.commit-hash=%u",
+                     volinfo->rebal.commit_hash);
+    runner_add_arg(&runner, "--socket-file");
+    runner_argprintf(&runner, "%s", sockfile);
+    runner_add_arg(&runner, "--pid-file");
+    runner_argprintf(&runner, "%s", pidfile);
+    runner_add_arg(&runner, "-l");
+    runner_argprintf(&runner, "%s", logfile);
+    if (volinfo->memory_accounting)
+        runner_add_arg(&runner, "--mem-accounting");
+    if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+                      SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+                      &localtime_logging) == 0) {
+        if (strcmp(localtime_logging, "enable") == 0)
+            runner_add_arg(&runner, "--localtime-logging");
+    }
+
+    snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s",
+             volinfo->volname);
+    runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
+    ret = runner_run_nowait(&runner);
+    if (ret) {
+        gf_msg_debug("glusterd", 0, "rebalance command failed");
+        goto out;
+    }
+
+    sleep(5);
+
+    ret = glusterd_rebalance_rpc_create(volinfo);
+
+    // FIXME: this cbk is passed as NULL in all occurrences. May be
+    // we never needed it.
+    if (cbk)
+        defrag->cbk_fn = cbk;
 
-                /* Completed first step */
-                volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE;
-        }
-
-        if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
-                /* It was used by number of layout fixes on directories */
-                defrag->total_files = 0;
-
-                volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED;
-
-                /* Step 2: Iterate over directories to move data */
-                ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount);
-                if (ret) {
-                        volinfo->defrag_status   = GF_DEFRAG_STATUS_FAILED;
-                        goto out;
-                }
-
-                /* Completed second step */
-                volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE;
-        }
-
-        /* Completed whole process */
-        if (defrag->cmd == GF_DEFRAG_CMD_START)
-                volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
-
-        volinfo->rebalance_files = defrag->total_files;
-        volinfo->rebalance_data  = defrag->total_data;
-        volinfo->lookedup_files  = defrag->num_files_lookedup;
 out:
-        volinfo->defrag = NULL;
-        if (defrag) {
-                gf_log ("rebalance", GF_LOG_INFO, "rebalance on %s complete",
-                        defrag->mount);
-
-                ret = runcmd ("umount", "-l", defrag->mount, NULL);
-                LOCK_DESTROY (&defrag->lock);
-                GF_FREE (defrag);
-        }
-
-        return NULL;
+    gf_msg_debug("glusterd", 0, "Returning %d", ret);
+    return ret;
 }
 
 int
-glusterd_defrag_stop_validate (glusterd_volinfo_t *volinfo,
-                               char *op_errstr, size_t len)
+glusterd_rebalance_defrag_init(glusterd_volinfo_t *volinfo, defrag_cbk_fn_t cbk)
+
 {
-        int     ret = -1;
-        if (glusterd_is_defrag_on (volinfo) == 0) {
-                snprintf (op_errstr, len, "Rebalance on %s is either Completed "
-                          "or not yet started", volinfo->volname);
-                goto out;
-        }
+    glusterd_defrag_info_t *defrag = NULL;
+    int ret = -1;
+
+    if (!volinfo->rebal.defrag) {
+        volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag),
+                                          gf_gld_mt_defrag_info);
+    } else {
+        /*
+         * if defrag variable is already initialized,
+         * we skip the initialization.
+         */
         ret = 0;
+        goto out;
+    }
+
+    if (!volinfo->rebal.defrag)
+        goto out;
+    defrag = volinfo->rebal.defrag;
+
+    defrag->cmd = volinfo->rebal.defrag_cmd;
+    LOCK_INIT(&defrag->lock);
+    if (cbk)
+        defrag->cbk_fn = cbk;
+    ret = 0;
 out:
-        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
-        return ret;
+    return ret;
 }
 
 int
-glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files,
-                      u_quad_t *size, char *op_errstr, size_t len)
+glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
 {
-        /* TODO: set a variaeble 'stop_defrag' here, it should be checked
-           in defrag loop */
-        int     ret = -1;
-        GF_ASSERT (volinfo);
-        GF_ASSERT (files);
-        GF_ASSERT (size);
-        GF_ASSERT (op_errstr);
-
-        ret = glusterd_defrag_stop_validate (volinfo, op_errstr, len);
-        if (ret)
-                goto out;
-        if (!volinfo || !volinfo->defrag) {
-                ret = -1;
-                goto out;
-        }
-
-        LOCK (&volinfo->defrag->lock);
-        {
-                volinfo->defrag_status = GF_DEFRAG_STATUS_STOPED;
-                *files = volinfo->defrag->total_files;
-                *size = volinfo->defrag->total_data;
-        }
-        UNLOCK (&volinfo->defrag->lock);
-
-        ret = 0;
+    dict_t *options = NULL;
+    char sockfile[PATH_MAX] = {
+        0,
+    };
+    int ret = -1;
+    glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
+    glusterd_conf_t *priv = NULL;
+    xlator_t *this = NULL;
+
+    this = THIS;
+    GF_ASSERT(this);
+    priv = this->private;
+    GF_ASSERT(priv);
+
+    // rebalance process is not started
+    if (!defrag)
+        goto out;
+
+    options = dict_new();
+    if (!options) {
+        gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+        goto out;
+    }
+
+    GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+
+    /* Setting frame-timeout to 10mins (600seconds).
+     * Unix domain sockets ensures that the connection is reliable. The
+     * default timeout of 30mins used for unreliable network connections is
+     * too long for unix domain socket connections.
+     */
+    ret = rpc_transport_unix_options_build(options, sockfile, 600);
+    if (ret) {
+        gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL,
+               "Unix options build failed");
+        goto out;
+    }
+
+    glusterd_volinfo_ref(volinfo);
+    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
+                              volinfo, _gf_true);
+    if (ret) {
+        gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
+               "Glusterd RPC creation failed");
+        goto out;
+    }
+    ret = 0;
 out:
-        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
-        return ret;
+    if (options)
+        dict_unref(options);
+    return ret;
 }
 
 int
-glusterd_defrag_status_get_v2 (glusterd_volinfo_t *volinfo,
-                            gf2_cli_defrag_vol_rsp *rsp)
+glusterd_rebalance_cmd_validate(int cmd, char *volname,
+                                glusterd_volinfo_t **volinfo, char *op_errstr,
+                                size_t len)
 {
-        if (!volinfo)
-                goto out;
-
-        if (volinfo->defrag) {
-                LOCK (&volinfo->defrag->lock);
-                {
-                        rsp->files = volinfo->defrag->total_files;
-                        rsp->size = volinfo->defrag->total_data;
-                        rsp->lookedup_files = volinfo->defrag->num_files_lookedup;
-                }
-                UNLOCK (&volinfo->defrag->lock);
-        } else {
-                rsp->files = volinfo->rebalance_files;
-                rsp->size  = volinfo->rebalance_data;
-                rsp->lookedup_files = volinfo->lookedup_files;
-        }
+    int ret = -1;
+
+    if (glusterd_volinfo_find(volname, volinfo)) {
+        gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+               "Received rebalance on invalid"
+               " volname %s",
+               volname);
+        snprintf(op_errstr, len, "Volume %s does not exist", volname);
+        goto out;
+    }
+    if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) {
+        gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_DISTRIBUTE,
+               "Volume %s is not a "
+               "distribute type or contains only 1 brick",
+               volname);
+        snprintf(op_errstr, len,
+                 "Volume %s is not a distribute "
+                 "volume or contains only 1 brick.\n"
+                 "Not performing rebalance",
+                 volname);
+        goto out;
+    }
+
+    if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) {
+        gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_STOPPED,
+               "Received rebalance on stopped"
+               " volname %s",
+               volname);
+        snprintf(op_errstr, len,
+                 "Volume %s needs to "
+                 "be started to perform rebalance",
+                 volname);
+        goto out;
+    }
+
+    ret = 0;
 
-        rsp->op_errno = volinfo->defrag_status;
-        rsp->op_ret = 0;
 out:
-        return 0;
+    gf_msg_debug("glusterd", 0, "Returning %d", ret);
+    return ret;
 }
 
 int
-glusterd_defrag_status_get (glusterd_volinfo_t *volinfo,
-                            gf1_cli_defrag_vol_rsp *rsp)
+__glusterd_handle_defrag_volume(rpcsvc_request_t *req)
 {
-        if (!volinfo)
-                goto out;
-
-        if (volinfo->defrag) {
-                LOCK (&volinfo->defrag->lock);
-                {
-                        rsp->files = volinfo->defrag->total_files;
-                        rsp->size = volinfo->defrag->total_data;
-                        rsp->lookedup_files = volinfo->defrag->num_files_lookedup;
-                }
-                UNLOCK (&volinfo->defrag->lock);
-        } else {
-                rsp->files = volinfo->rebalance_files;
-                rsp->size  = volinfo->rebalance_data;
-                rsp->lookedup_files = volinfo->lookedup_files;
+    int32_t ret = -1;
+    gf_cli_req cli_req = {{
+        0,
+    }};
+    glusterd_conf_t *priv = NULL;
+    int32_t op = GD_OP_NONE;
+    dict_t *dict = NULL;
+    char *volname = NULL;
+    gf_cli_defrag_type cmd = 0;
+    char msg[2048] = {
+        0,
+    };
+    xlator_t *this = NULL;
+
+    GF_ASSERT(req);
+    this = THIS;
+    GF_ASSERT(this);
+
+    priv = this->private;
+    GF_ASSERT(priv);
+
+    ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+    if (ret < 0) {
+        // failed to decode msg;
+        req->rpc_err = GARBAGE_ARGS;
+        gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+        goto out;
+    }
+
+    if (cli_req.dict.dict_len) {
+        /* Unserialize the dictionary */
+        dict = dict_new();
+
+        ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+                               &dict);
+        if (ret < 0) {
+            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+                   "failed to "
+                   "unserialize req-buffer to dictionary");
+            snprintf(msg, sizeof(msg),
+                     "Unable to decode the "
+                     "command");
+            goto out;
         }
-
-        rsp->op_errno = volinfo->defrag_status;
-        rsp->op_ret = 0;
+    }
+
+    ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        snprintf(msg, sizeof(msg), "Failed to get volume name");
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+        goto out;
+    }
+
+    ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+                          (int32_t *)&cmd);
+    if (ret) {
+        snprintf(msg, sizeof(msg), "Failed to get command");
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+        goto out;
+    }
+
+    ret = dict_set_static_bin(dict, "node-uuid", MY_UUID, 16);
+    if (ret)
+        goto out;
+
+    if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) {
+        op = GD_OP_DEFRAG_BRICK_VOLUME;
+    } else
+        op = GD_OP_REBALANCE;
+
+    if (priv->op_version < GD_OP_VERSION_6_0) {
+        gf_msg_debug(this->name, 0,
+                     "The cluster is operating at "
+                     "version less than %d. Falling back "
+                     "to op-sm framework.",
+                     GD_OP_VERSION_6_0);
+        ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg));
+        glusterd_friend_sm();
+        glusterd_op_sm();
+    } else {
+        ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, op,
+                                                                      dict);
+    }
 out:
-        return 0;
+    if (ret) {
+        if (msg[0] == '\0')
+            snprintf(msg, sizeof(msg), "Operation failed");
+        ret = glusterd_op_send_cli_response(GD_OP_REBALANCE, ret, 0, req, dict,
+                                            msg);
+    }
+
+    free(cli_req.dict.dict_val);  // malloced by xdr
+    gf_msg_debug(this->name, 0, "Returning %d", ret);
+    return ret;
 }
 
-void
-glusterd_rebalance_cmd_attempted_log (int cmd, char *volname)
+int
+glusterd_handle_defrag_volume(rpcsvc_request_t *req)
 {
-        switch (cmd) {
-                case GF_DEFRAG_CMD_START_LAYOUT_FIX:
-                        gf_cmd_log ("Volume rebalance"," on volname: %s "
-                                    "cmd: start fix layout , attempted",
-                                    volname);
-                        break;
-                case GF_DEFRAG_CMD_START_MIGRATE_DATA:
-                case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
-                        gf_cmd_log ("Volume rebalance"," on volname: %s "
-                                    "cmd: start data migrate attempted",
-                                    volname);
-                        break;
-                case GF_DEFRAG_CMD_START:
-                        gf_cmd_log ("Volume rebalance"," on volname: %s "
-                                    "cmd: start, attempted", volname);
-                        break;
-                case GF_DEFRAG_CMD_STOP:
-                        gf_cmd_log ("Volume rebalance"," on volname: %s "
-                                    "cmd: stop, attempted", volname);
-                        break;
-                default:
-                        break;
-        }
-
-        gf_log ("glusterd", GF_LOG_INFO, "Received rebalance volume %d on %s",
-                cmd, volname);
+    return glusterd_big_locked_handler(req, __glusterd_handle_defrag_volume);
 }
 
-void
-glusterd_rebalance_cmd_log (int cmd, char *volname, int status)
+static int
+glusterd_brick_validation(dict_t *dict, char *key, data_t *value, void *data)
 {
-        if (cmd != GF_DEFRAG_CMD_STATUS) {
-                gf_cmd_log ("volume rebalance"," on volname: %s %d %s",
-                            volname, cmd, ((status)?"FAILED":"SUCCESS"));
-        }
+    int32_t ret = -1;
+    xlator_t *this = NULL;
+    glusterd_volinfo_t *volinfo = data;
+    glusterd_brickinfo_t *brickinfo = NULL;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    ret = glusterd_volume_brickinfo_get_by_brick(value->data, volinfo,
+                                                 &brickinfo, _gf_false);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
+               "Incorrect brick %s for "
+               "volume %s",
+               value->data, volinfo->volname);
+        return ret;
+    }
+
+    if (!brickinfo->decommissioned) {
+        gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
+               "Incorrect brick %s for "
+               "volume %s",
+               value->data, volinfo->volname);
+        ret = -1;
+        return ret;
+    }
+
+    return ret;
 }
 
 int
-glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
-                                size_t len)
+glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict)
 {
-        int     ret = -1;
-
-        if (glusterd_is_defrag_on (volinfo)) {
-                gf_log ("glusterd", GF_LOG_DEBUG,
-                        "rebalance on volume %s already started",
-                        volinfo->volname);
-                snprintf (op_errstr, len, "Rebalance on %s is already started",
-                          volinfo->volname);
-                goto out;
+    int ret = -1;
+    int32_t cmd = 0;
+    char *volname = NULL;
+    glusterd_volinfo_t *volinfo = NULL;
+    char msg[2048] = {0};
+    char *task_id_str = NULL;
+    xlator_t *this = NULL;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    GF_ASSERT(rsp_dict);
+    GF_ASSERT(req_dict);
+
+    ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "volname not found");
+        goto out;
+    }
+
+    ret = dict_get_int32n(rsp_dict, "rebalance-command",
+                          SLEN("rebalance-command"), &cmd);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "cmd not found");
+        goto out;
+    }
+
+    ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+                                          sizeof(msg));
+    if (ret) {
+        gf_msg_debug(this->name, 0, "failed to validate");
+        goto out;
+    }
+
+    /* reblance id is generted in glusterd_mgmt_v3_op_stage_rebalance(), but
+     * rsp_dict is unavailable there. So copying it to rsp_dict from req_dict
+     * here. So that cli can display the rebalance id.*/
+    if ((cmd == GF_DEFRAG_CMD_START) ||
+        (cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX) ||
+        (cmd == GF_DEFRAG_CMD_START_FORCE)) {
+        if (is_origin_glusterd(rsp_dict)) {
+            ret = dict_get_strn(req_dict, GF_REBALANCE_TID_KEY,
+                                SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+            if (ret) {
+                snprintf(msg, sizeof(msg), "Missing rebalance-id");
+                gf_msg(this->name, GF_LOG_WARNING, 0,
+                       GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+                ret = 0;
+            } else {
+                gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+                ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+                                                 rsp_dict, GF_REBALANCE_TID_KEY,
+                                                 SLEN(GF_REBALANCE_TID_KEY));
+                if (ret) {
+                    snprintf(msg, sizeof(msg),
+                             "Failed to set rebalance id for volume %s",
+                             volname);
+                    gf_msg(this->name, GF_LOG_WARNING, 0,
+                           GD_MSG_DICT_SET_FAILED, "%s", msg);
+                }
+            }
         }
-
-        if (glusterd_is_rb_started (volinfo) ||
-            glusterd_is_rb_paused (volinfo)) {
-                gf_log ("glusterd", GF_LOG_DEBUG,
-                        "Replace brick is in progress on volume %s",
-                        volinfo->volname);
-                snprintf (op_errstr, len, "Replace brick is in progress on "
-                          "volume %s", volinfo->volname);
+    }
+
+    /* Set task-id, if available, in rsp_dict for operations other than
+     * start. This is needed when we want rebalance id in xml output
+     */
+    if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+        if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+            if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+                ret = glusterd_copy_uuid_to_dict(
+                    volinfo->rebal.rebalance_id, rsp_dict,
+                    GF_REMOVE_BRICK_TID_KEY, SLEN(GF_REMOVE_BRICK_TID_KEY));
+            else
+                ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+                                                 rsp_dict, GF_REBALANCE_TID_KEY,
+                                                 SLEN(GF_REBALANCE_TID_KEY));
+            if (ret) {
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+                       "Failed to set task-id for volume %s", volname);
                 goto out;
+            }
         }
-        ret = 0;
+    }
 out:
-        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
-        return ret;
+    return ret;
 }
 
 int
-glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
-                              size_t len, int cmd)
+glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr)
 {
-        int                    ret = -1;
-        glusterd_defrag_info_t *defrag =  NULL;
-        runner_t               runner = {0,};
-        glusterd_conf_t        *priv = NULL;
-
-        priv    = THIS->private;
-
-        GF_ASSERT (volinfo);
-        GF_ASSERT (op_errstr);
+    char *volname = NULL;
+    char *cmd_str = NULL;
+    int ret = 0;
+    int32_t cmd = 0;
+    char msg[2048] = {0};
+    glusterd_volinfo_t *volinfo = NULL;
+    char *task_id_str = NULL;
+    xlator_t *this = 0;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "volname not found");
+        goto out;
+    }
+
+    ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+                          &cmd);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "cmd not found");
+        goto out;
+    }
+
+    ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+                                          sizeof(msg));
+    if (ret) {
+        gf_msg_debug(this->name, 0, "failed to validate");
+        goto out;
+    }
+    switch (cmd) {
+        case GF_DEFRAG_CMD_START:
+        case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+            /* Check if the connected clients are all of version
+             * glusterfs-3.6 and higher. This is needed to prevent some data
+             * loss issues that could occur when older clients are connected
+             * when rebalance is run. This check can be bypassed by using
+             * 'force'
+             */
+            ret = glusterd_check_client_op_version_support(
+                volname, GD_OP_VERSION_3_6_0, NULL);
+            if (ret) {
+                ret = gf_asprintf(op_errstr,
+                                  "Volume %s has one or "
+                                  "more connected clients of a version"
+                                  " lower than GlusterFS-v3.6.0. "
+                                  "Starting rebalance in this state "
+                                  "could lead to data loss.\nPlease "
+                                  "disconnect those clients before "
+                                  "attempting this command again.",
+                                  volname);
+                goto out;
+            }
+            /* Fall through */
+        case GF_DEFRAG_CMD_START_FORCE:
+            if (is_origin_glusterd(dict)) {
+                ret = glusterd_generate_and_set_task_id(
+                    dict, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY));
+                if (ret) {
+                    gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+                           "Failed to generate task-id");
+                    goto out;
+                }
+            } else {
+                ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+                                    SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+                if (ret) {
+                    snprintf(msg, sizeof(msg), "Missing rebalance-id");
+                    gf_msg(this->name, GF_LOG_WARNING, 0,
+                           GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+                    ret = 0;
+                }
+            }
+            ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg),
+                                                 GD_OP_REBALANCE);
+            if (ret) {
+                gf_msg_debug(this->name, 0,
+                             "defrag start validate "
+                             "failed for volume %s.",
+                             volinfo->volname);
+                goto out;
+            }
+            break;
+        case GF_DEFRAG_CMD_STATUS:
+        case GF_DEFRAG_CMD_STOP:
 
-        ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
-        if (ret)
+            ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+            if (ret) {
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+                       "Failed to get "
+                       "command string");
+                ret = -1;
                 goto out;
-        if (!volinfo->defrag)
-                volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t),
-                                             gf_gld_mt_defrag_info);
-        if (!volinfo->defrag)
+            }
+            if ((strstr(cmd_str, "rebalance") != NULL) &&
+                (volinfo->rebal.op != GD_OP_REBALANCE)) {
+                snprintf(msg, sizeof(msg),
+                         "Rebalance not started "
+                         "for volume %s.",
+                         volinfo->volname);
+                ret = -1;
                 goto out;
+            }
+
+            if (strstr(cmd_str, "remove-brick") != NULL) {
+                if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+                    snprintf(msg, sizeof(msg),
+                             "remove-brick not "
+                             "started for volume %s.",
+                             volinfo->volname);
+                    ret = -1;
+                    goto out;
+                }
 
-        defrag = volinfo->defrag;
+                /* For remove-brick status/stop command check whether
+                 * given input brick is part of volume or not.*/
 
-        defrag->cmd = cmd;
+                ret = dict_foreach_fnmatch(dict, "brick*",
+                                           glusterd_brick_validation, volinfo);
+                if (ret == -1) {
+                    snprintf(msg, sizeof(msg),
+                             "Incorrect brick"
+                             " for volume %s",
+                             volinfo->volname);
+                    goto out;
+                }
+            }
+            break;
 
-        LOCK_INIT (&defrag->lock);
-        snprintf (defrag->mount, 1024, "%s/mount/%s",
-                  priv->workdir, volinfo->volname);
-        /* Create a directory, mount glusterfs over it, start glusterfs-defrag */
-        runinit (&runner);
-        runner_add_args (&runner, "mkdir", "-p", defrag->mount, NULL);
-        ret = runner_run_reuse (&runner);
-        if (ret) {
-                runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
-                runner_end (&runner);
-                goto out;
-        }
-        runner_end (&runner);
-
-        runinit (&runner);
-        runner_add_args (&runner, GFS_PREFIX"/sbin/glusterfs",
-                         "-s", "localhost", "--volfile-id", volinfo->volname,
-                         "--xlator-option", "*dht.use-readdirp=yes",
-                         "--xlator-option", "*dht.lookup-unhashed=yes",
-                         defrag->mount, NULL);
-        ret = runner_run_reuse (&runner);
-        if (ret) {
-                runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
-                runner_end (&runner);
-                goto out;
-        }
-        runner_end (&runner);
-
-        volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
-
-        ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start,
-                              volinfo);
-        if (ret) {
-                runinit (&runner);
-                runner_add_args (&runner, "umount", "-l", defrag->mount, NULL);
-                ret = runner_run_reuse (&runner);
-                if (ret)
-                        runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
-                runner_end (&runner);
-        }
+        default:
+            break;
+    }
+
+    ret = 0;
 out:
-        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-        return ret;
+    if (ret && op_errstr && msg[0])
+        *op_errstr = gf_strdup(msg);
+
+    return ret;
 }
 
 int
-glusterd_rebalance_cmd_validate (int cmd, char *volname,
-                                 glusterd_volinfo_t **volinfo,
-                                 char *op_errstr, size_t len)
+glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
 {
-        int ret = -1;
+    char *volname = NULL;
+    int ret = 0;
+    int32_t cmd = 0;
+    char msg[2048] = {0};
+    glusterd_volinfo_t *volinfo = NULL;
+    glusterd_brickinfo_t *brickinfo = NULL;
+    glusterd_brickinfo_t *tmp = NULL;
+    gf_boolean_t volfile_update = _gf_false;
+    char *task_id_str = NULL;
+    xlator_t *this = NULL;
+    uint32_t commit_hash;
+    int32_t is_force = 0;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "volname not given");
+        goto out;
+    }
+
+    ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+                          &cmd);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "command not given");
+        goto out;
+    }
+
+    ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+                                          sizeof(msg));
+    if (ret) {
+        gf_msg_debug(this->name, 0, "cmd validate failed");
+        goto out;
+    }
+
+    switch (cmd) {
+        case GF_DEFRAG_CMD_START:
+        case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+        case GF_DEFRAG_CMD_START_FORCE:
+
+            ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
+            if (ret)
+                is_force = 0;
+            if (!is_force) {
+                /* Reset defrag status to 'NOT STARTED' whenever a
+                 * remove-brick/rebalance command is issued to remove
+                 * stale information from previous run.
+                 */
+                volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+
+                ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+                                    SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+                if (ret) {
+                    gf_msg_debug(this->name, 0,
+                                 "Missing rebalance"
+                                 " id");
+                    ret = 0;
+                } else {
+                    gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+                    volinfo->rebal.op = GD_OP_REBALANCE;
+                }
+                if (!gd_should_i_start_rebalance(volinfo)) {
+                    /* Store the rebalance-id and rebalance command
+                     * even if the peer isn't starting a rebalance
+                     * process. On peers where a rebalance process
+                     * is started, glusterd_handle_defrag_start
+                     * performs the storing.
+                     * Storing this is needed for having
+                     * 'volume status' work correctly.
+                     */
+                    glusterd_store_perform_node_state_store(volinfo);
+                    break;
+                }
+                if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+                    volinfo->rebal.commit_hash = commit_hash;
+                }
+                ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg),
+                                                   cmd, NULL, GD_OP_REBALANCE);
+                break;
+            } else {
+                /* Reset defrag status to 'STARTED' so that the
+                 * pid is checked and restarted accordingly.
+                 * If the pid is not running it executes the
+                 * "NOT_STARTED" case and restarts the process
+                 */
+                volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+                volinfo->rebal.defrag_cmd = cmd;
+                volinfo->rebal.op = GD_OP_REBALANCE;
+
+                ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+                                    SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+                if (ret) {
+                    gf_msg_debug(this->name, 0,
+                                 "Missing rebalance"
+                                 " id");
+                    ret = 0;
+                } else {
+                    gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+                    volinfo->rebal.op = GD_OP_REBALANCE;
+                }
+                if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+                    volinfo->rebal.commit_hash = commit_hash;
+                }
+                ret = glusterd_restart_rebalance_for_volume(volinfo);
+                break;
+            }
+        case GF_DEFRAG_CMD_STOP:
+            /* Clear task-id only on explicitly stopping rebalance.
+             * Also clear the stored operation, so it doesn't cause trouble
+             * with future rebalance/remove-brick starts
+             */
+            gf_uuid_clear(volinfo->rebal.rebalance_id);
+            volinfo->rebal.op = GD_OP_NONE;
+
+            /* Fall back to the old volume file in case of decommission*/
+            cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
+                                         brick_list)
+            {
+                if (!brickinfo->decommissioned)
+                    continue;
+                brickinfo->decommissioned = 0;
+                volfile_update = _gf_true;
+            }
+
+            if (volfile_update == _gf_false) {
+                ret = 0;
+                break;
+            }
 
-        if (glusterd_volinfo_find(volname, volinfo)) {
-                gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid"
-                        " volname %s", volname);
-                snprintf (op_errstr, len, "Volume %s does not exist",
-                          volname);
+            ret = glusterd_create_volfiles_and_notify_services(volinfo);
+            if (ret) {
+                gf_msg(this->name, GF_LOG_WARNING, 0,
+                       GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
                 goto out;
-        }
+            }
 
-        if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) {
-                gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped"
-                        " volname %s", volname);
-                snprintf (op_errstr, len, "Volume %s needs to "
-                          "be started to perform rebalance", volname);
+            ret = glusterd_store_volinfo(volinfo,
+                                         GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+            if (ret) {
+                gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+                       "failed to store volinfo");
                 goto out;
-        }
-        ret = 0;
+            }
+
+            ret = 0;
+            break;
+
+        case GF_DEFRAG_CMD_STATUS:
+            break;
+        default:
+            break;
+    }
+
 out:
-        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
-        return ret;
+    if (ret && op_errstr && msg[0])
+        *op_errstr = gf_strdup(msg);
+
+    return ret;
 }
 
 int
-glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req)
+glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
 {
-        int32_t                 ret           = -1;
-        gf1_cli_defrag_vol_req  cli_req       = {0,};
-        glusterd_volinfo_t     *volinfo = NULL;
-        gf2_cli_defrag_vol_rsp rsp = {0,};
-        char                    msg[2048] = {0};
-        glusterd_conf_t        *priv = NULL;
-
-        GF_ASSERT (req);
-
-        priv    = THIS->private;
-        if (!gf_xdr_to_cli_defrag_vol_req (req->msg[0], &cli_req)) {
-                //failed to decode msg;
-                req->rpc_err = GARBAGE_ARGS;
+    char *volname = NULL;
+    char *cmd_str = NULL;
+    int ret = 0;
+    int32_t cmd = 0;
+    char msg[2048] = {0};
+    glusterd_volinfo_t *volinfo = NULL;
+    char *task_id_str = NULL;
+    dict_t *op_ctx = NULL;
+    xlator_t *this = 0;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "volname not found");
+        goto out;
+    }
+
+    ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+                          &cmd);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "cmd not found");
+        goto out;
+    }
+
+    ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+                                          sizeof(msg));
+    if (ret) {
+        gf_msg_debug(this->name, 0, "failed to validate");
+        goto out;
+    }
+    switch (cmd) {
+        case GF_DEFRAG_CMD_START:
+        case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+            /* Check if the connected clients are all of version
+             * glusterfs-3.6 and higher. This is needed to prevent some data
+             * loss issues that could occur when older clients are connected
+             * when rebalance is run. This check can be bypassed by using
+             * 'force'
+             */
+            ret = glusterd_check_client_op_version_support(
+                volname, GD_OP_VERSION_3_6_0, NULL);
+            if (ret) {
+                ret = gf_asprintf(op_errstr,
+                                  "Volume %s has one or "
+                                  "more connected clients of a version"
+                                  " lower than GlusterFS-v3.6.0. "
+                                  "Starting rebalance in this state "
+                                  "could lead to data loss.\nPlease "
+                                  "disconnect those clients before "
+                                  "attempting this command again.",
+                                  volname);
                 goto out;
-        }
-
-        glusterd_rebalance_cmd_attempted_log (cli_req.cmd, cli_req.volname);
+            }
+            /* Fall through */
+        case GF_DEFRAG_CMD_START_FORCE:
+            if (is_origin_glusterd(dict)) {
+                op_ctx = glusterd_op_get_ctx();
+                if (!op_ctx) {
+                    ret = -1;
+                    gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL,
+                           "Failed to get op_ctx");
+                    goto out;
+                }
 
-        rsp.volname = cli_req.volname;
-        rsp.op_ret = -1;
-        rsp.op_errstr = msg;
+                ret = glusterd_generate_and_set_task_id(
+                    op_ctx, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY));
+                if (ret) {
+                    gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+                           "Failed to generate task-id");
+                    goto out;
+                }
+            } else {
+                ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+                                    SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+                if (ret) {
+                    snprintf(msg, sizeof(msg), "Missing rebalance-id");
+                    gf_msg(this->name, GF_LOG_WARNING, 0,
+                           GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+                    ret = 0;
+                }
+            }
+            ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg),
+                                                 GD_OP_REBALANCE);
+            if (ret) {
+                gf_msg_debug(this->name, 0,
+                             "defrag start validate "
+                             "failed for volume %s.",
+                             volinfo->volname);
+                goto out;
+            }
+            break;
+        case GF_DEFRAG_CMD_STATUS:
+        case GF_DEFRAG_CMD_STOP:
 
-        ret = glusterd_rebalance_cmd_validate (cli_req.cmd, cli_req.volname,
-                                               &volinfo, msg, sizeof (msg));
-        if (ret)
+            ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+            if (ret) {
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+                       "Failed to get "
+                       "command string");
+                ret = -1;
+                goto out;
+            }
+            if ((strstr(cmd_str, "rebalance") != NULL) &&
+                (volinfo->rebal.op != GD_OP_REBALANCE)) {
+                snprintf(msg, sizeof(msg),
+                         "Rebalance not started "
+                         "for volume %s.",
+                         volinfo->volname);
+                ret = -1;
                 goto out;
+            }
+
+            if (strstr(cmd_str, "remove-brick") != NULL) {
+                if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+                    snprintf(msg, sizeof(msg),
+                             "remove-brick not "
+                             "started for volume %s.",
+                             volinfo->volname);
+                    ret = -1;
+                    goto out;
+                }
+
+                /* For remove-brick status/stop command check whether
+                 * given input brick is part of volume or not.*/
+
+                ret = dict_foreach_fnmatch(dict, "brick*",
+                                           glusterd_brick_validation, volinfo);
+                if (ret == -1) {
+                    snprintf(msg, sizeof(msg),
+                             "Incorrect brick"
+                             " for volume %s",
+                             volinfo->volname);
+                    goto out;
+                }
+            }
+            break;
 
-        switch (cli_req.cmd) {
-        case GF_DEFRAG_CMD_START:
-        case GF_DEFRAG_CMD_START_LAYOUT_FIX:
-        case GF_DEFRAG_CMD_START_MIGRATE_DATA:
-        case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
-                ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
-                                                    cli_req.cmd);
-                rsp.op_ret = ret;
-                break;
-        case GF_DEFRAG_CMD_STOP:
-                ret = glusterd_defrag_stop (volinfo, &rsp.files, &rsp.size,
-                                            msg, sizeof (msg));
-                rsp.op_ret = ret;
-                break;
-        case GF_DEFRAG_CMD_STATUS:
-                ret = glusterd_defrag_status_get_v2 (volinfo, &rsp);
-                break;
         default:
-                break;
-        }
-        glusterd_rebalance_cmd_log (cli_req.cmd, cli_req.volname, rsp.op_ret);
-out:
+            break;
+    }
 
-        ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
-                                     gf_xdr_serialize_cli_defrag_vol_rsp_v2);
-        if (cli_req.volname)
-                free (cli_req.volname);//malloced by xdr
+    ret = 0;
+out:
+    if (ret && op_errstr && msg[0])
+        *op_errstr = gf_strdup(msg);
 
-        return 0;
+    return ret;
 }
 
 int
-glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
 {
-        int32_t                ret           = -1;
-        gf1_cli_defrag_vol_req cli_req       = {0,};
-        glusterd_conf_t         *priv = NULL;
-        char                   cmd_str[4096] = {0,};
-        glusterd_volinfo_t      *volinfo = NULL;
-        gf1_cli_defrag_vol_rsp rsp = {0,};
-        char                    msg[2048] = {0};
-
-        GF_ASSERT (req);
-
-        priv    = THIS->private;
-
-        if (!gf_xdr_to_cli_defrag_vol_req (req->msg[0], &cli_req)) {
-                //failed to decode msg;
-                req->rpc_err = GARBAGE_ARGS;
+    char *volname = NULL;
+    int ret = 0;
+    int32_t cmd = 0;
+    char msg[2048] = {0};
+    glusterd_volinfo_t *volinfo = NULL;
+    glusterd_brickinfo_t *brickinfo = NULL;
+    glusterd_brickinfo_t *tmp = NULL;
+    gf_boolean_t volfile_update = _gf_false;
+    char *task_id_str = NULL;
+    dict_t *ctx = NULL;
+    xlator_t *this = NULL;
+    uint32_t commit_hash;
+    int32_t is_force = 0;
+
+    this = THIS;
+    GF_ASSERT(this);
+
+    ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "volname not given");
+        goto out;
+    }
+
+    ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+                          &cmd);
+    if (ret) {
+        gf_msg_debug(this->name, 0, "command not given");
+        goto out;
+    }
+
+    ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+                                          sizeof(msg));
+    if (ret) {
+        gf_msg_debug(this->name, 0, "cmd validate failed");
+        goto out;
+    }
+
+    /* Set task-id, if available, in op_ctx dict for operations other than
+     * start
+     */
+    if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+        if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+            ctx = glusterd_op_get_ctx();
+            if (!ctx) {
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL,
+                       "Failed to get op_ctx");
+                ret = -1;
                 goto out;
+            }
+
+            if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+                ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+                                                 ctx, GF_REMOVE_BRICK_TID_KEY,
+                                                 SLEN(GF_REMOVE_BRICK_TID_KEY));
+            else
+                ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+                                                 ctx, GF_REBALANCE_TID_KEY,
+                                                 SLEN(GF_REBALANCE_TID_KEY));
+            if (ret) {
+                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+                       "Failed to set task-id");
+                goto out;
+            }
         }
+    }
 
-        glusterd_rebalance_cmd_attempted_log (cli_req.cmd, cli_req.volname);
-
-        rsp.volname = cli_req.volname;
-        rsp.op_ret = -1;
-
-        ret = glusterd_rebalance_cmd_validate (cli_req.cmd, cli_req.volname,
-                                               &volinfo, msg, sizeof (msg));
-        if (ret)
-                goto out;
-        switch (cli_req.cmd) {
+    switch (cmd) {
         case GF_DEFRAG_CMD_START:
         case GF_DEFRAG_CMD_START_LAYOUT_FIX:
-        case GF_DEFRAG_CMD_START_MIGRATE_DATA:
-        case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
-        {
-                ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
-                                                    cli_req.cmd);
-                rsp.op_ret = ret;
+        case GF_DEFRAG_CMD_START_FORCE:
+
+            ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
+            if (ret)
+                is_force = 0;
+            if (!is_force) {
+                /* Reset defrag status to 'NOT STARTED' whenever a
+                 * remove-brick/rebalance command is issued to remove
+                 * stale information from previous run.
+                 */
+                volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+
+                ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+                                    SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+                if (ret) {
+                    gf_msg_debug(this->name, 0,
+                                 "Missing rebalance"
+                                 " id");
+                    ret = 0;
+                } else {
+                    gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+                    volinfo->rebal.op = GD_OP_REBALANCE;
+                }
+                if (!gd_should_i_start_rebalance(volinfo)) {
+                    /* Store the rebalance-id and rebalance command
+                     * even if the peer isn't starting a rebalance
+                     * process. On peers where a rebalance process
+                     * is started, glusterd_handle_defrag_start
+                     * performs the storing.
+                     * Storing this is needed for having
+                     * 'volume status' work correctly.
+                     */
+                    glusterd_store_perform_node_state_store(volinfo);
+                    break;
+                }
+                if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+                    volinfo->rebal.commit_hash = commit_hash;
+                }
+                ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg),
+                                                   cmd, NULL, GD_OP_REBALANCE);
                 break;
-        }
+            } else {
+                /* Reset defrag status to 'STARTED' so that the
+                 * pid is checked and restarted accordingly.
+                 * If the pid is not running it executes the
+                 * "NOT_STARTED" case and restarts the process
+                 */
+                volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+                volinfo->rebal.defrag_cmd = cmd;
+                volinfo->rebal.op = GD_OP_REBALANCE;
+
+                ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+                                    SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+                if (ret) {
+                    gf_msg_debug(this->name, 0,
+                                 "Missing rebalance"
+                                 " id");
+                    ret = 0;
+                } else {
+                    gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+                    volinfo->rebal.op = GD_OP_REBALANCE;
+                }
+                if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+                    volinfo->rebal.commit_hash = commit_hash;
+                }
+                ret = glusterd_restart_rebalance_for_volume(volinfo);
+                break;
+            }
         case GF_DEFRAG_CMD_STOP:
-                ret = glusterd_defrag_stop (volinfo, &rsp.files, &rsp.size,
-                                            msg, sizeof (msg));
-                rsp.op_ret = ret;
+            /* Clear task-id only on explicitly stopping rebalance.
+             * Also clear the stored operation, so it doesn't cause trouble
+             * with future rebalance/remove-brick starts
+             */
+            gf_uuid_clear(volinfo->rebal.rebalance_id);
+            volinfo->rebal.op = GD_OP_NONE;
+
+            /* Fall back to the old volume file in case of decommission*/
+            cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
+                                         brick_list)
+            {
+                if (!brickinfo->decommissioned)
+                    continue;
+                brickinfo->decommissioned = 0;
+                volfile_update = _gf_true;
+            }
+
+            if (volfile_update == _gf_false) {
+                ret = 0;
                 break;
+            }
+
+            ret = glusterd_create_volfiles_and_notify_services(volinfo);
+            if (ret) {
+                gf_msg(this->name, GF_LOG_WARNING, 0,
+                       GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
+                goto out;
+            }
+
+            ret = glusterd_store_volinfo(volinfo,
+                                         GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+            if (ret) {
+                gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+                       "failed to store volinfo");
+                goto out;
+            }
+
+            ret = 0;
+            break;
+
         case GF_DEFRAG_CMD_STATUS:
-                ret = glusterd_defrag_status_get (volinfo, &rsp);
-                break;
+            break;
         default:
-                break;
-        }
-        if (ret)
-                gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed",cmd_str);
-
-        if (cli_req.cmd != GF_DEFRAG_CMD_STATUS) {
-                gf_cmd_log ("volume rebalance"," on volname: %s %d %s",
-                            cli_req.volname,
-                            cli_req.cmd, ((ret)?"FAILED":"SUCCESS"));
-        }
+            break;
+    }
 
 out:
-        ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
-                                     gf_xdr_serialize_cli_defrag_vol_rsp);
-        if (cli_req.volname)
-                free (cli_req.volname);//malloced by xdr
+    if (ret && op_errstr && msg[0])
+        *op_errstr = gf_strdup(msg);
 
-        return 0;
+    return ret;
+}
+
+int32_t
+glusterd_defrag_event_notify_handle(dict_t *dict)
+{
+    glusterd_volinfo_t *volinfo = NULL;
+    char *volname = NULL;
+    char *volname_ptr = NULL;
+    int32_t ret = -1;
+    xlator_t *this = NULL;
+
+    this = THIS;
+    GF_ASSERT(this);
+    GF_ASSERT(dict);
+
+    ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+               "Failed to get volname");
+        return ret;
+    }
+
+    volname_ptr = strstr(volname, "rebalance/");
+    if (volname_ptr) {
+        volname_ptr = strchr(volname_ptr, '/');
+        volname = volname_ptr + 1;
+    } else {
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
+               "volname received (%s) is not prefixed with rebalance.",
+               volname);
+        ret = -1;
+        goto out;
+    }
+
+    ret = glusterd_volinfo_find(volname, &volinfo);
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+               "Failed to get volinfo for %s", volname);
+        return ret;
+    }
+
+    ret = glusterd_defrag_volume_status_update(volinfo, dict, 0);
+
+    if (ret) {
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DEFRAG_STATUS_UPDATE_FAIL,
+               "Failed to update status");
+        gf_event(EVENT_REBALANCE_STATUS_UPDATE_FAILED, "volume=%s",
+                 volinfo->volname);
+    }
+
+out:
+    return ret;
 }