1 files changed, 93 insertions, 43 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 4c7282e82..b7b974c68 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
                           int count, void *myframe);
 int
 glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
-                                size_t len)
+                                size_t len, glusterd_op_t op)
 {
-        int     ret = -1;
+        int      ret = -1;
+        xlator_t *this = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
+
+        /* Check only if operation is not remove-brick */
+        if ((GD_OP_REMOVE_BRICK != op) &&
+            !gd_is_remove_brick_committed (volinfo)) {
+                gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on "
+                        "volume %s is not yet committed", volinfo->volname);
+                snprintf (op_errstr, len, "A remove-brick task on volume %s is"
+                          " not yet committed. Either commit or stop the "
+                          "remove-brick task.", volinfo->volname);
+                goto out;
+        }
 
         if (glusterd_is_defrag_on (volinfo)) {
-                gf_log ("glusterd", GF_LOG_DEBUG,
+                gf_log (this->name, GF_LOG_DEBUG,
                         "rebalance on volume %s already started",
                         volinfo->volname);
                 snprintf (op_errstr, len, "Rebalance on %s is already started",
@@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
 
         if (glusterd_is_rb_started (volinfo) ||
             glusterd_is_rb_paused (volinfo)) {
-                gf_log ("glusterd", GF_LOG_DEBUG,
+                gf_log (this->name, GF_LOG_DEBUG,
                         "Rebalance failed as replace brick is in progress on volume %s",
                         volinfo->volname);
                 snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
@@ -66,13 +81,14 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
         }
         ret = 0;
 out:
-        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+        gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
         return ret;
 }
 
+
 int32_t
-glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
-                        rpc_clnt_event_t event, void *data)
+__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+                          rpc_clnt_event_t event, void *data)
 {
         glusterd_volinfo_t      *volinfo = NULL;
         glusterd_defrag_info_t  *defrag  = NULL;
@@ -130,8 +146,6 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
                                                 GF_DEFRAG_STATUS_STARTED) {
                                 volinfo->rebal.defrag_status =
                                                    GF_DEFRAG_STATUS_FAILED;
-                        } else {
-                                volinfo->rebal.defrag_cmd = 0;
                         }
                  }
 
@@ -160,6 +174,14 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
         return ret;
 }
 
+int32_t
+glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+                        rpc_clnt_event_t event, void *data)
+{
+        return glusterd_big_locked_notify (rpc, mydata, event,
+                                           data, __glusterd_defrag_notify);
+}
+
 int
 glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
                               size_t len, int cmd, defrag_cbk_fn_t cbk,
@@ -174,15 +196,14 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
         char                   pidfile[PATH_MAX] = {0,};
         char                   logfile[PATH_MAX] = {0,};
         dict_t                 *options = NULL;
-#ifdef DEBUG
         char                   valgrind_logfile[PATH_MAX] = {0,};
-#endif
+
         priv    = THIS->private;
 
         GF_ASSERT (volinfo);
         GF_ASSERT (op_errstr);
 
-        ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
+        ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
         if (ret)
                 goto out;
         if (!volinfo->rebal.defrag)
@@ -196,6 +217,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
 
         defrag->cmd = cmd;
 
+        volinfo->rebal.defrag_cmd = cmd;
         volinfo->rebal.op = op;
 
         LOCK_INIT (&defrag->lock);
@@ -218,7 +240,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
         snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log",
                     DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname);
         runinit (&runner);
-#ifdef DEBUG
+
         if (priv->valgrind) {
                 snprintf (valgrind_logfile, PATH_MAX,
                           "%s/valgrind-%s-rebalance.log",
@@ -226,10 +248,10 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
                           volinfo->volname);
 
                 runner_add_args (&runner, "valgrind", "--leak-check=full",
-                                 "--trace-children=yes", NULL);
+                                 "--trace-children=yes", "--track-origins=yes",
+                                 NULL);
                 runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
         }
-#endif
 
         runner_add_args (&runner, SBIN_DIR"/glusterfs",
                          "-s", "localhost", "--volfile-id", volinfo->volname,
@@ -240,6 +262,8 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
                          "--xlator-option",
                          "*replicate*.metadata-self-heal=off",
                          "--xlator-option", "*replicate*.entry-self-heal=off",
+                         "--xlator-option", "*replicate*.readdir-failover=off",
+                         "--xlator-option", "*dht.readdir-optimize=on",
                          NULL);
         runner_add_arg (&runner, "--xlator-option");
         runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
@@ -267,14 +291,16 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
          * default timeout of 30mins used for unreliable network connections is
          * too long for unix domain socket connections.
          */
-        ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600);
+        ret = rpc_transport_unix_options_build (&options, sockfile, 600);
         if (ret) {
                 gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
                 goto out;
         }
 
+        synclock_unlock (&priv->big_lock);
         ret = glusterd_rpc_create (&defrag->rpc, options,
                                    glusterd_defrag_notify, volinfo);
+        synclock_lock (&priv->big_lock);
         if (ret) {
                 gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
                 goto out;
@@ -319,14 +345,16 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
          * default timeout of 30mins used for unreliable network connections is
          * too long for unix domain socket connections.
          */
-        ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600);
+        ret = rpc_transport_unix_options_build (&options, sockfile, 600);
         if (ret) {
                 gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
                 goto out;
         }
 
+        synclock_unlock (&priv->big_lock);
         ret = glusterd_rpc_create (&defrag->rpc, options,
                                    glusterd_defrag_notify, volinfo);
+        synclock_lock (&priv->big_lock);
         if (ret) {
                 gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
                 goto out;
@@ -375,26 +403,32 @@ out:
 }
 
 int
-glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+__glusterd_handle_defrag_volume (rpcsvc_request_t *req)
 {
-        int32_t                 ret     = -1;
-        gf_cli_req              cli_req = {{0,}};
-        glusterd_conf_t        *priv    = NULL;
-        dict_t                 *dict    = NULL;
-        char                   *volname = NULL;
-        gf_cli_defrag_type      cmd     = 0;
+        int32_t                 ret       = -1;
+        gf_cli_req              cli_req   = {{0,}};
+        glusterd_conf_t        *priv      = NULL;
+        dict_t                 *dict      = NULL;
+        char                   *volname   = NULL;
+        gf_cli_defrag_type      cmd       = 0;
+        char                    msg[2048] = {0,};
+        xlator_t               *this      = NULL;
 
         GF_ASSERT (req);
+        this = THIS;
+        GF_ASSERT (this);
 
-        priv = THIS->private;
+        priv = this->private;
+        GF_ASSERT (priv);
 
-        if (!xdr_to_generic (req->msg[0], &cli_req,
-                             (xdrproc_t)xdr_gf_cli_req)) {
+        ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+        if (ret < 0) {
                 //failed to decode msg;
                 req->rpc_err = GARBAGE_ARGS;
                 goto out;
         }
-       if (cli_req.dict.dict_len) {
+
+        if (cli_req.dict.dict_len) {
                 /* Unserialize the dictionary */
                 dict  = dict_new ();
 
@@ -402,24 +436,25 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)
                                         cli_req.dict.dict_len,
                                         &dict);
                 if (ret < 0) {
-                        gf_log ("glusterd", GF_LOG_ERROR,
-                                "failed to "
+                        gf_log (this->name, GF_LOG_ERROR, "failed to "
                                 "unserialize req-buffer to dictionary");
+                        snprintf (msg, sizeof (msg), "Unable to decode the "
+                                  "command");
                         goto out;
                 }
         }
 
         ret = dict_get_str (dict, "volname", &volname);
         if (ret) {
-                gf_log (THIS->name, GF_LOG_ERROR,
-                        "Failed to get volname");
+                snprintf (msg, sizeof (msg), "Failed to get volume name");
+                gf_log (this->name, GF_LOG_ERROR, "%s", msg);
                 goto out;
         }
 
         ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd);
         if (ret) {
-                gf_log (THIS->name, GF_LOG_ERROR,
-                        "Failed to get command");
+                snprintf (msg, sizeof (msg), "Failed to get command");
+                gf_log (this->name, GF_LOG_ERROR, "%s", msg);
                 goto out;
         }
 
@@ -430,9 +465,10 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)
         if ((cmd == GF_DEFRAG_CMD_STATUS) ||
               (cmd == GF_DEFRAG_CMD_STOP)) {
                 ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
-                                                  dict);
+                                         dict, msg, sizeof (msg));
         } else
-                ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);
+                ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict,
+                                         msg, sizeof (msg));
 
 out:
 
@@ -440,11 +476,10 @@ out:
         glusterd_op_sm ();
 
         if (ret) {
+                if (msg[0] == '\0')
+                        snprintf (msg, sizeof (msg), "Operation failed");
                 ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0,
-                                                     req, dict,
-                                                     "operation failed");
-                if (dict)
-                        dict_unref (dict);
+                                                     req, dict, msg);
 
         }
 
@@ -453,6 +488,12 @@ out:
         return 0;
 }
 
+int
+glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+{
+        return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume);
+}
+
 
 int
 glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
@@ -491,7 +532,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
         case GF_DEFRAG_CMD_START:
         case GF_DEFRAG_CMD_START_LAYOUT_FIX:
         case GF_DEFRAG_CMD_START_FORCE:
-                if (is_origin_glusterd ()) {
+                if (is_origin_glusterd (dict)) {
                         op_ctx = glusterd_op_get_ctx ();
                         if (!op_ctx) {
                                 ret = -1;
@@ -517,8 +558,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
                                 ret = 0;
                         }
                 }
-                ret = glusterd_defrag_start_validate (volinfo,
-                                msg, sizeof (msg));
+                ret = glusterd_defrag_start_validate (volinfo, msg,
+                                                      sizeof (msg),
+                                                      GD_OP_REBALANCE);
                 if (ret) {
                         gf_log (this->name, GF_LOG_DEBUG,
                                         "start validate failed");
@@ -621,11 +663,19 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
                         ret = 0;
                 } else {
                         uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+                        volinfo->rebal.op = GD_OP_REBALANCE;
                 }
                 ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
                                                     cmd, NULL, GD_OP_REBALANCE);
                  break;
         case GF_DEFRAG_CMD_STOP:
+                /* Clear task-id only on explicitly stopping rebalance.
+                 * Also clear the stored operation, so it doesn't cause trouble
+                 * with future rebalance/remove-brick starts
+                 */
+                uuid_clear (volinfo->rebal.rebalance_id);
+                volinfo->rebal.op = GD_OP_NONE;
+
                 /* Fall back to the old volume file in case of decommission*/
                 list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
                                           brick_list) {