diff options
23 files changed, 1260 insertions, 532 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 6bea948e9..6ab1515e3 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -731,7 +731,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,          if (!dict)                  goto out; -        if (!((wordcount == 4) || (wordcount == 5) || (wordcount == 6))) { +        if (!((wordcount == 4) || (wordcount == 5))) {                  cli_usage_out (word->pattern);                  parse_error = 1;                  goto out; @@ -741,7 +741,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,                  index = 3;          } else {                  if (strcmp (words[3], "fix-layout") && -                    strcmp (words[3], "migrate-data")) { +                    strcmp (words[3], "start")) {                          cli_usage_out (word->pattern);                          parse_error = 1;                          goto out; @@ -750,7 +750,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,          }  	if (strcmp (words[index], "start") && strcmp (words[index], "stop") && -            strcmp (words[index], "status")) { +            strcmp (words[index], "status") && strcmp (words[index], "force")) {  	        cli_usage_out (word->pattern);  		parse_error = 1;  		goto out; @@ -766,27 +766,19 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,                          goto out;          }          if (wordcount == 5) { -                ret = dict_set_str (dict, "start-type", (char *)words[3]); -                if (ret) -                        goto out; -                ret = dict_set_str (dict, "command", (char *)words[4]); -                if (ret) -                        goto out; -        } - -        /* 'force' option is valid only for the 'migrate-data' key */ -        if (wordcount == 6) { -                if (strcmp (words[3], "migrate-data") || -                    strcmp (words[4], "start") || -                    strcmp (words[5], "force")) { +               if ((strcmp (words[3], "fix-layout") || +                    strcmp (words[4], "start")) && +                    (strcmp (words[3], "start") || +                    strcmp (words[4], "force"))) {                          cli_usage_out (word->pattern);                          parse_error = 1;                          goto out;                  } -                ret = dict_set_str (dict, "start-type", "migrate-data-force"); + +                ret = dict_set_str (dict, "option", (char *)words[4]);                  if (ret)                          goto out; -                ret = dict_set_str (dict, "command", (char *)words[4]); +                ret = dict_set_str (dict, "command", (char *)words[3]);                  if (ret)                          goto out;          } @@ -1776,7 +1768,7 @@ struct cli_cmd volume_cmds[] = {            cli_cmd_volume_remove_brick_cbk,            "remove brick from volume <VOLNAME>"}, -        { "volume rebalance <VOLNAME> [fix-layout|migrate-data] {start|stop|status} [force]", +        { "volume rebalance <VOLNAME> [fix-layout] {start|stop|status} [force]",            cli_cmd_volume_defrag_cbk,            "rebalance operations"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 6ed380bff..d1888415c 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -1052,9 +1052,9 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                                            volname);                  } else {                          snprintf (msg, sizeof (msg), -                                  "stopped rebalance process of volume %s \n" -                                  "(after rebalancing %"PRId64" files totaling" -                                  " %"PRId64" bytes)", volname, files, size); +                                 "Stopped rebalance process on volume %s \n" +                                 "(after rebalancing %"PRId64" bytes - " +                                 "%"PRId64" files)", volname, size, files);                  }                  goto done;          } @@ -1065,7 +1065,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                                            "%s", rsp.op_errstr);                          else                                  snprintf (msg, sizeof (msg), -                                          "failed to get the status of " +                                          "Failed to get the status of "                                            "rebalance process");                          goto done;                  } @@ -1074,11 +1074,8 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                  case GF_DEFRAG_STATUS_NOT_STARTED:                          status = "not started";                          break; -                case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED: -                        status = "step 1: layout fix in progress"; -                        break; -                case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED: -                        status = "step 2: data migration in progress"; +                case GF_DEFRAG_STATUS_STARTED: +                        status = "in progress";                          break;                  case GF_DEFRAG_STATUS_STOPPED:                          status = "stopped"; @@ -1089,38 +1086,17 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                  case GF_DEFRAG_STATUS_FAILED:                          status = "failed";                          break; -                case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: -                        status = "step 1: layout fix complete"; -                        break; -                case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE: -                        status = "step 2: data migration complete"; -                        break; -                case GF_DEFRAG_STATUS_PAUSED: -                        status = "paused"; -                        break;                  } -                if (files && (rsp.op_errno == 1)) { -                        snprintf (msg, sizeof (msg), -                                  "rebalance %s: fixed layout %"PRId64, -                                  status, files); -                        goto done; -                } -                if (files && (rsp.op_errno == 6)) { -                        snprintf (msg, sizeof (msg), -                                  "rebalance %s: fixed layout %"PRId64, -                                  status, files); -                        goto done; -                } -                if (files) { -                        snprintf (msg, sizeof (msg), -                                  "rebalance %s: rebalanced %"PRId64 +                if (files || size || lookup) { +                        snprintf (msg, sizeof(msg), +                                  "Rebalance %s: rebalanced %"PRId64                                    " files of size %"PRId64" (total files"                                    " scanned %"PRId64")", status,                                    files, size, lookup);                          goto done;                  } -                snprintf (msg, sizeof (msg), "rebalance %s", status); +                snprintf (msg, sizeof (msg), "Rebalance %s", status);                  goto done;          } @@ -1129,7 +1105,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                  snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);          else                  snprintf (msg, sizeof (msg), -                          "starting rebalance on volume %s has been %s", +                          "Starting rebalance on volume %s has been %s",                            volname, (rsp.op_ret) ? "unsuccessful":                            "successful"); @@ -1398,24 +1374,18 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,          case GF_DEFRAG_STATUS_NOT_STARTED:                  status = "not started";                  break; -        case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED: -        case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED: -        case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: +        case GF_DEFRAG_STATUS_STARTED:                  status = "in progress";                  break;          case GF_DEFRAG_STATUS_STOPPED:                  status = "stopped";                  break;          case GF_DEFRAG_STATUS_COMPLETE: -        case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:                  status = "completed";                  break;          case GF_DEFRAG_STATUS_FAILED:                  status = "failed";                  break; -        case GF_DEFRAG_STATUS_PAUSED: -                status = "paused"; -                break;          }          if (rsp.dict.dict_len) { @@ -2479,20 +2449,19 @@ gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,          if (strcmp (cmd_str, "start") == 0) {                  cmd = GF_DEFRAG_CMD_START; -                ret = dict_get_str (dict, "start-type", &cmd_str); +                ret = dict_get_str (dict, "option", &cmd_str);                  if (!ret) { -                        if (strcmp (cmd_str, "fix-layout") == 0) { -                                cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX; -                        } -                        if (strcmp (cmd_str, "migrate-data") == 0) { -                                cmd = GF_DEFRAG_CMD_START_MIGRATE_DATA; -                        } -                        if (strcmp (cmd_str, "migrate-data-force") == 0) { -                                cmd = GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE; +                        if (strcmp (cmd_str, "force") == 0) { +                                cmd = GF_DEFRAG_CMD_START_FORCE;                          }                  }                  goto done;          } + +        if (strcmp (cmd_str, "fix-layout") == 0) { +                cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX; +                goto done; +        }          if (strcmp (cmd_str, "stop") == 0) {                  cmd = GF_DEFRAG_CMD_STOP;                  goto done; diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index e2b658a98..08f8a05f9 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -736,6 +736,79 @@ out:          return ret;  } + +int +glusterfs_handle_defrag (rpcsvc_request_t *req) +{ +        int32_t                  ret     = -1; +        gd1_mgmt_brick_op_req    xlator_req = {0,}; +        dict_t                   *dict    = NULL; +        xlator_t                 *xlator = NULL; +        xlator_t                 *any = NULL; +        dict_t                   *output = NULL; +        char                     msg[2048] = {0}; +        glusterfs_ctx_t          *ctx = NULL; +        glusterfs_graph_t        *active = NULL; +        xlator_t                 *this = NULL; + +        GF_ASSERT (req); +        this = THIS; +        GF_ASSERT (this); + +        ctx = glusterfs_ctx_get (); +        GF_ASSERT (ctx); + +        active = ctx->active; +        any = active->first; +        if (!xdr_to_generic (req->msg[0], &xlator_req, +                             (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) { +                //failed to decode msg; +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } +        dict = dict_new (); +        if (!dict) +                goto out; + +        ret = dict_unserialize (xlator_req.input.input_val, +                                xlator_req.input.input_len, +                                &dict); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to " +                        "unserialize req-buffer to dictionary"); +                goto out; +        } +        xlator = xlator_search_by_name (any, xlator_req.name); +        if (!xlator) { +                snprintf (msg, sizeof (msg), "xlator %s is not loaded", +                          xlator_req.name); +                goto out; +        } + +        output = dict_new (); +        if (!output) { +                ret = -1; +                goto out; +        } + +        ret = xlator->notify (xlator, GF_EVENT_VOLUME_DEFRAG, dict, output); + +        ret = glusterfs_translator_info_response_send (req, ret, +                                                       msg, output); +out: +        if (dict) +                dict_unref (dict); +        if (xlator_req.input.input_val) +                free (xlator_req.input.input_val); // malloced by xdr +        if (output) +                dict_unref (output); +        if (xlator_req.name) +                free (xlator_req.name); //malloced by xdr + +        return ret; + +}  int  glusterfs_handle_brick_status (rpcsvc_request_t *req)  { @@ -887,6 +960,9 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)          case GLUSTERD_BRICK_STATUS:                  ret = glusterfs_handle_brick_status (req);                  break; +        case GLUSTERD_BRICK_XLATOR_DEFRAG: +                ret = glusterfs_handle_defrag (req); +                break;          default:                  break;          } @@ -943,7 +1019,8 @@ rpcsvc_actor_t glusterfs_actors[] = {          [GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},          [GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0},          [GLUSTERD_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GLUSTERD_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL, 0}, -        [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0} +        [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0}, +        [GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0}  };  struct rpcsvc_program glusterfs_mop_prog = { diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index ccfdc11dc..ef9cdfe85 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -383,6 +383,7 @@ typedef enum {          GF_EVENT_TRANSLATOR_INFO,          GF_EVENT_TRIGGER_HEAL,          GF_EVENT_AUTH_FAILED, +        GF_EVENT_VOLUME_DEFRAG,          GF_EVENT_MAXVAL,  } glusterfs_event_t; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 6ef4cb702..874f46e0b 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -184,6 +184,7 @@ enum glusterd_brick_procnum {          GLUSTERD_BRICK_XLATOR_HEAL,          GLUSTERD_BRICK_STATUS,          GLUSTERD_BRICK_OP, +        GLUSTERD_BRICK_XLATOR_DEFRAG,          GLUSTERD_BRICK_MAXVALUE,  }; diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h index d136ec255..903b6ff72 100644 --- a/rpc/xdr/src/cli1-xdr.h +++ b/rpc/xdr/src/cli1-xdr.h @@ -47,22 +47,16 @@ enum gf_cli_defrag_type {  	GF_DEFRAG_CMD_STOP = 1 + 1,  	GF_DEFRAG_CMD_STATUS = 1 + 2,  	GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, -	GF_DEFRAG_CMD_START_MIGRATE_DATA = 1 + 4, -	GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE = 1 + 5, -	GF_DEFRAG_CMD_START_FORCE = 1 + 6, +	GF_DEFRAG_CMD_START_FORCE = 1 + 4,  };  typedef enum gf_cli_defrag_type gf_cli_defrag_type;  enum gf_defrag_status_t {  	GF_DEFRAG_STATUS_NOT_STARTED = 0, -	GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED = 1, -	GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED = 2, -	GF_DEFRAG_STATUS_STOPPED = 3, -	GF_DEFRAG_STATUS_COMPLETE = 4, -	GF_DEFRAG_STATUS_FAILED = 5, -	GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE = 6, -	GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE = 7, -	GF_DEFRAG_STATUS_PAUSED = 8, +	GF_DEFRAG_STATUS_STARTED = 1, +	GF_DEFRAG_STATUS_STOPPED = 2, +	GF_DEFRAG_STATUS_COMPLETE = 3, +	GF_DEFRAG_STATUS_FAILED = 4,  };  typedef enum gf_defrag_status_t gf_defrag_status_t; diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 5f491c7b4..f45712ce0 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -3,21 +3,15 @@          GF_DEFRAG_CMD_STOP,          GF_DEFRAG_CMD_STATUS,          GF_DEFRAG_CMD_START_LAYOUT_FIX, -        GF_DEFRAG_CMD_START_MIGRATE_DATA, -        GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE,          GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */  } ;   enum gf_defrag_status_t {          GF_DEFRAG_STATUS_NOT_STARTED, -        GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED, -        GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED, +        GF_DEFRAG_STATUS_STARTED,          GF_DEFRAG_STATUS_STOPPED,          GF_DEFRAG_STATUS_COMPLETE, -        GF_DEFRAG_STATUS_FAILED, -        GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE, -        GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE, -        GF_DEFRAG_STATUS_PAUSED +        GF_DEFRAG_STATUS_FAILED  } ;   enum gf1_cluster_type { diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 29b3dca83..360a432cd 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -4296,16 +4296,22 @@ dht_forget (xlator_t *this, inode_t *inode)  int  dht_notify (xlator_t *this, int event, void *data, ...)  { -        xlator_t   *subvol = NULL; -        int         cnt    = -1; -        int         i      = -1; -        dht_conf_t *conf   = NULL; -        int         ret    = -1; -        int         propagate = 0; +        xlator_t                *subvol = NULL; +        int                      cnt    = -1; +        int                      i      = -1; +        dht_conf_t              *conf   = NULL; +        int                      ret    = -1; +        int                      propagate = 0; + +        int                      had_heard_from_all = 0; +        int                      have_heard_from_all = 0; +        struct timeval           time = {0,}; +        gf_defrag_info_t        *defrag = NULL; +        dict_t                  *dict   = NULL; +        gf_defrag_type           cmd    = 0; +        dict_t                  *output = NULL; +        va_list                  ap; -        int         had_heard_from_all = 0; -        int         have_heard_from_all = 0; -        struct timeval  time = {0,};          conf = this->private;          if (!conf) @@ -4418,6 +4424,36 @@ dht_notify (xlator_t *this, int event, void *data, ...)                  UNLOCK (&conf->subvolume_lock);                  break; +        case GF_EVENT_VOLUME_DEFRAG: +        { +                if (!conf->defrag) { +                        return ret; +                } +                defrag = conf->defrag; + +                dict = data; +                va_start (ap, data); +                output = va_arg (ap, dict_t*); + +                ret = dict_get_int32 (dict, "rebalance-command", +                                      (int32_t*)&cmd); +                if (ret) +                        return ret; +                LOCK (&defrag->lock); +                { +                        if (defrag->is_exiting) +                                goto unlock; +                        if (cmd == GF_DEFRAG_CMD_STATUS) +                                gf_defrag_status_get (defrag, output); +                        else if (cmd == GF_DEFRAG_CMD_STOP) +                                gf_defrag_stop (defrag, output); +                } +unlock: +                UNLOCK (&defrag->lock); +                return 0; +                break; +        } +          default:                  propagate = 1;                  break; @@ -4433,8 +4469,19 @@ dht_notify (xlator_t *this, int event, void *data, ...)          /* if all subvols have reported status, no need to hide anything             or wait for anything else. Just propagate blindly */ -        if (have_heard_from_all) +        if (have_heard_from_all) {                  propagate = 1; +                if (conf->defrag) { +                        ret = pthread_create (&conf->defrag->th, NULL, +                                              gf_defrag_start, this); +                        if (ret) { +                                conf->defrag = NULL; +                                GF_FREE (conf->defrag); +                                kill (getpid(), SIGTERM); +                        } +                } +        } +          if (!had_heard_from_all && have_heard_from_all) {                  /* This is the first event which completes aggregation diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 749abe538..d97ef9f58 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -175,6 +175,43 @@ struct dht_du {  };  typedef struct dht_du dht_du_t; +enum gf_defrag_type { +        GF_DEFRAG_CMD_START = 1, +        GF_DEFRAG_CMD_STOP = 1 + 1, +        GF_DEFRAG_CMD_STATUS = 1 + 2, +        GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, +        GF_DEFRAG_CMD_START_FORCE = 1 + 4, +}; +typedef enum gf_defrag_type gf_defrag_type; + +enum gf_defrag_status_t { +        GF_DEFRAG_STATUS_NOT_STARTED, +        GF_DEFRAG_STATUS_STARTED, +        GF_DEFRAG_STATUS_STOPPED, +        GF_DEFRAG_STATUS_COMPLETE, +        GF_DEFRAG_STATUS_FAILED, +}; +typedef enum gf_defrag_status_t gf_defrag_status_t; + + +struct gf_defrag_info_ { +        uint64_t                     total_files; +        uint64_t                     total_data; +        uint64_t                     num_files_lookedup; +        gf_lock_t                    lock; +        int                          cmd; +        pthread_t                    th; +        gf_defrag_status_t           defrag_status; +        struct rpc_clnt             *rpc; +        uint32_t                     connected; +        uint32_t                     is_exiting; +        pid_t                        pid; +        inode_t                     *root_inode; + +}; + +typedef struct gf_defrag_info_ gf_defrag_info_t; +  struct dht_conf {          gf_lock_t      subvolume_lock;          int            subvolume_cnt; @@ -208,6 +245,9 @@ struct dht_conf {          /* to keep track of nodes which are decomissioned */          xlator_t     **decommissioned_bricks; + +        /* defrag related */ +        gf_defrag_info_t *defrag;  };  typedef struct dht_conf dht_conf_t; @@ -608,6 +648,13 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                       inode_t *inode, struct iatt *stbuf, struct iatt *preparent,                       struct iatt *postparent); +int +gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); + +int +gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output); +void* +gf_defrag_start (void *this);  #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 21fb5a7ca..a12ed1534 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -37,6 +37,7 @@ enum gf_dht_mem_types_ {          gf_switch_mt_switch_struct,          gf_dht_mt_subvol_time,          gf_dht_mt_loc_t, +        gf_defrag_info_mt,          gf_dht_mt_end  };  #endif diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index dfd6f3b6e..46fc8773e 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -669,7 +669,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                          loc->path, from->name, strerror (errno));          } -        if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) { +        if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {                  /* take out the source from namespace */                  ret = syncop_unlink (from, loc);                  if (ret) { @@ -805,3 +805,607 @@ dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)                              frame, frame);          return ret;  } + +int +gf_listener_stop (void) +{ +        glusterfs_ctx_t  *ctx = NULL; +        cmd_args_t       *cmd_args = NULL; +        int              ret = 0; +        xlator_t         *this = NULL; + +        ctx = glusterfs_ctx_get (); +        GF_ASSERT (ctx); +        cmd_args = &ctx->cmd_args; +        if (cmd_args->sock_file) { +                ret = unlink (cmd_args->sock_file); +                if (ret && (ENOENT == errno)) { +                        ret = 0; +                } +        } + +        if (ret) { +                this = THIS; +                gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener " +                        "socket %s, error: %s", cmd_args->sock_file, +                        strerror (errno)); +        } +        return ret; +} + +void +dht_build_root_inode (xlator_t *this, inode_t **inode) +{ +        inode_table_t    *itable        = NULL; +        uuid_t            root_gfid     = {0, }; + +        itable = inode_table_new (0, this); +        if (!itable) +                return; + +        root_gfid[15] = 1; +        *inode = inode_find (itable, root_gfid); +} + +void +dht_build_root_loc (inode_t *inode, loc_t *loc) +{ +        loc->path = "/"; +        loc->inode = inode; +        loc->inode->ia_type = IA_IFDIR; +        memset (loc->gfid, 0, 16); +        loc->gfid[15] = 1; +} + + +/* return values: 1 -> error, bug ignore and continue +                  0 -> proceed +                 -1 -> error, handle it */ +int32_t +gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) +{ +        /* if errno is not ENOSPC or ENOTCONN, we can still continue +           with rebalance process */ +        if ((errno != ENOSPC) || (errno != ENOTCONN)) +                return 1; + +        if (errno == ENOTCONN) { +                /* Most probably mount point went missing (mostly due +                   to a brick down), say rebalance failure to user, +                   let him restart it if everything is fine */ +                defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; +                return -1; +        } + +        if (errno == ENOSPC) { +                /* rebalance process itself failed, may be +                   remote brick went down, or write failed due to +                   disk full etc etc.. */ +                defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; +                return -1; +        } + +        return 0; +} + +/* We do a depth first traversal of directories. But before we move into + * subdirs, we complete the data migration of those directories whose layouts + * have been fixed + */ + +int +gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, +                        dict_t *migrate_data) +{ +        int                      ret            = -1; +        loc_t                    entry_loc      = {0,}; +        fd_t                    *fd             = NULL; +        gf_dirent_t              entries; +        gf_dirent_t             *tmp            = NULL; +        gf_dirent_t             *entry          = NULL; +        gf_boolean_t             free_entries   = _gf_false; +        off_t                    offset         = 0; +        dict_t                  *dict           = NULL; +        struct iatt              iatt           = {0,}; +        int32_t                  op_errno       = 0; + +        fd = fd_create (loc->inode, defrag->pid); +        if (!fd) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); +                goto out; +        } + +        ret = syncop_opendir (this, loc, fd); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s", +                        loc->path); +                goto out; +        } + +        INIT_LIST_HEAD (&entries.list); + +        while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, +                &entries)) != 0) +        { +                if ((ret < 0) || (ret && (errno == ENOENT))) +                        break; + +                free_entries = _gf_true; + +                if (list_empty (&entries.list)) +                        break; +                list_for_each_entry_safe (entry, tmp, &entries.list, list) { +                        if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +                                ret = 1; +                                goto out; +                        } + +                        offset = entry->d_off; + +                        if (!strcmp (entry->d_name, ".") || +                            !strcmp (entry->d_name, "..")) +                                continue; + +                        if (IA_ISDIR (entry->d_stat.ia_type)) +                                continue; + +                        defrag->num_files_lookedup++; +                        if (entry->d_stat.ia_nlink > 1) +                                continue; + +                        loc_wipe (&entry_loc); +                        ret =dht_build_child_loc (this, &entry_loc, loc, +                                                  entry->d_name); +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, "Child loc" +                                        " build failed"); +                                goto out; +                        } + +                        if (uuid_is_null (entry->d_stat.ia_gfid)) { +                                gf_log (this->name, GF_LOG_ERROR, "%s/%s" +                                        " gfid not present", loc->path, +                                         entry->d_name); +                                continue; +                        } + +                        uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); + +                        if (uuid_is_null (loc->gfid)) { +                                gf_log (this->name, GF_LOG_ERROR, "%s/%s" +                                        " gfid not present", loc->path, +                                         entry->d_name); +                                continue; +                        } + +                        uuid_copy (entry_loc.pargfid, loc->gfid); + +                        entry_loc.inode->ia_type = entry->d_stat.ia_type; + +                        ret = syncop_lookup (this, &entry_loc, NULL, &iatt, +                                             NULL, NULL); +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, "%s" +                                        " lookup failed", entry_loc.path); +                                continue; +                        } + +                        /* if distribute is present, it will honor this key. +                         * -1 is returned if distribute is not present or file +                         * doesn't have a link-file. If file has link-file, the +                         * path of link-file will be the value, and also that +                         * guarantees that file has to be mostly migrated */ + +                        ret = syncop_getxattr (this, &entry_loc, &dict, +                                               GF_XATTR_LINKINFO_KEY); +                        if (ret < 0) { +                                continue; +                        } + +                        ret = syncop_setxattr (this, &entry_loc, migrate_data, +                                               0); +                        if (ret) +                                gf_log (this->name, GF_LOG_ERROR, "setxattr " +                                        "failed for %s", entry_loc.path); + +                        if (ret == -1) { +                                op_errno = errno; +                                ret = gf_defrag_handle_migrate_error (op_errno, +                                                                      defrag); + +                                if (!ret) +                                        gf_log (this->name, GF_LOG_DEBUG, +                                                "setxattr on %s failed: %s", +                                                entry_loc.path, +                                                strerror (op_errno)); +                                else if (ret == 1) +                                        continue; +                                else if (ret == -1) +                                        goto out; +                        } + +                        LOCK (&defrag->lock); +                        { +                                defrag->total_files += 1; +                                defrag->total_data += iatt.ia_size; +                        } +                        UNLOCK (&defrag->lock); +                } + +                gf_dirent_free (&entries); +                free_entries = _gf_false; +                INIT_LIST_HEAD (&entries.list); + +        } +        ret = 0; +out: +        if (free_entries) +                gf_dirent_free (&entries); + +        loc_wipe (&entry_loc); + +        if (dict) +                dict_unref(dict); + +        if (fd) +                fd_unref (fd); +        return ret; + +} + + +int +gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, +                  dict_t *fix_layout, dict_t *migrate_data) +{ +        int                      ret            = -1; +        loc_t                    entry_loc      = {0,}; +        fd_t                    *fd             = NULL; +        gf_dirent_t              entries; +        gf_dirent_t             *tmp            = NULL; +        gf_dirent_t             *entry          = NULL; +        gf_boolean_t             free_entries   = _gf_false; +        dict_t                  *dict           = NULL; +        off_t                    offset         = 0; +        struct iatt              iatt           = {0,}; + +        ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s", +                        loc->path); +                goto out; +        } + +        if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) +                gf_defrag_migrate_data (this, defrag, loc, migrate_data); + +        gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path); + +        fd = fd_create (loc->inode, defrag->pid); +        if (!fd) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); +                ret = -1; +                goto out; +        } + +        ret = syncop_opendir (this, loc, fd); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s", +                        loc->path); +                ret = -1; +                goto out; +        } + +        INIT_LIST_HEAD (&entries.list); +        while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, +                &entries)) != 0) +        { +                if ((ret < 0) || (ret && (errno == ENOENT))) +                        break; +                free_entries = _gf_true; + +                if (list_empty (&entries.list)) +                        break; +                list_for_each_entry_safe (entry, tmp, &entries.list, list) { +                        if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +                                ret = 1; +                                goto out; +                        } + +                        offset = entry->d_off; + +                        if (!strcmp (entry->d_name, ".") || +                            !strcmp (entry->d_name, "..")) +                                continue; + +                        if (!IA_ISDIR (entry->d_stat.ia_type)) +                                continue; + +                        loc_wipe (&entry_loc); +                        ret =dht_build_child_loc (this, &entry_loc, loc, +                                                  entry->d_name); +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, "Child loc" +                                        " build failed"); +                                goto out; +                        } + +                        if (uuid_is_null (entry->d_stat.ia_gfid)) { +                                gf_log (this->name, GF_LOG_ERROR, "%s/%s" +                                        "gfid not present", loc->path, +                                         entry->d_name); +                                continue; +                        } + +                        entry_loc.inode->ia_type = entry->d_stat.ia_type; + +                        uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); +                        if (uuid_is_null (loc->gfid)) { +                                gf_log (this->name, GF_LOG_ERROR, "%s/%s" +                                        "gfid not present", loc->path, +                                         entry->d_name); +                                continue; +                        } + +                        uuid_copy (entry_loc.pargfid, loc->gfid); + +                        ret = syncop_lookup (this, &entry_loc, NULL, &iatt, +                                             NULL, NULL); +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, "%s" +                                        " lookup failed", entry_loc.path); +                                continue; +                        } + +                        ret = syncop_setxattr (this, &entry_loc, fix_layout, +                                               0); +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, "Setxattr " +                                        "failed for %s", entry_loc.path); +                                defrag->defrag_status = +                                GF_DEFRAG_STATUS_FAILED; +                                goto out; +                        } +                        ret = gf_defrag_fix_layout (this, defrag, &entry_loc, +                                                    fix_layout, migrate_data); + +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, "Fix layout " +                                        "failed for %s", entry_loc.path); +                                goto out; +                        } + +                } +                gf_dirent_free (&entries); +                free_entries = _gf_false; +                INIT_LIST_HEAD (&entries.list); +        } + +        ret = 0; +out: +        if (free_entries) +                gf_dirent_free (&entries); + +        loc_wipe (&entry_loc); + +        if (dict) +                dict_unref(dict); + +        if (fd) +                fd_unref (fd); + +        return ret; + +} + + +int +gf_defrag_start_crawl (void *data) +{ +        xlator_t                *this   = NULL; +        dht_conf_t              *conf   = NULL; +        gf_defrag_info_t        *defrag = NULL; +        int                      ret    = -1; +        loc_t                    loc    = {0,}; +        struct iatt              iatt   = {0,}; +        struct iatt              parent = {0,}; +        dict_t                  *fix_layout = NULL; +        dict_t                  *migrate_data = NULL; + +        this = data; +        if (!this) +                goto out; + +        conf = this->private; +        if (!conf) +                goto out; + +        defrag = conf->defrag; +        if (!defrag) +                goto out; + +        dht_build_root_inode (this, &defrag->root_inode); +        if (!defrag->root_inode) +                goto out; + +        dht_build_root_loc (defrag->root_inode, &loc); + +        /* fix-layout on '/' first */ + +        ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent); + +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "look up on / failed"); +                goto out; +        } + +        fix_layout = dict_new (); +        if (!fix_layout) { +                ret = -1; +                goto out; +        } + +        ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes"); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str"); +                goto out; +        } + +        ret = syncop_setxattr (this, &loc, fix_layout, 0); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed", +                        loc.path); +                goto out; +        } + +        if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { +                migrate_data = dict_new (); +                if (!migrate_data) { +                        ret = -1; +                        goto out; +                } +                if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) +                        ret = dict_set_str (migrate_data, +                                            "distribute.migrate-data", "force"); +                else +                        ret = dict_set_str (migrate_data, +                                            "distribute.migrate-data", +                                            "non-force"); +                if (ret) +                        goto out; +        } +        ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout, +                                    migrate_data); + +out: +        LOCK (&defrag->lock); +        { +                gf_defrag_status_get (defrag, NULL); +                defrag->is_exiting = 1; +        } +        UNLOCK (&defrag->lock); + +        if (defrag) +                GF_FREE (defrag); + +        return ret; +} + + +static int +gf_defrag_done  (int ret, call_frame_t *sync_frame, void *data) +{ +        gf_listener_stop(); + +        GF_FREE (data); +        STACK_DESTROY (sync_frame->root); +        kill (getpid(), SIGTERM); +        return 0; +} + +void * +gf_defrag_start (void *data) +{ +        int                      ret    = -1; +        call_frame_t            *frame  = NULL; +        dht_conf_t              *conf   = NULL; +        gf_defrag_info_t        *defrag = NULL; +        xlator_t                *this  = NULL; + +        this = data; +        conf = this->private; +        if (!conf) +                goto out; + +        defrag = conf->defrag; +        if (!defrag) +                goto out; + +        frame = create_frame (this, this->ctx->pool); +        if (!frame) +                goto out; + +        defrag->pid = frame->root->pid; + +        defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; + +        ret = synctask_new (this->ctx->env, gf_defrag_start_crawl, +                            gf_defrag_done, frame, this); + +        if (ret) +                gf_log (this->name, GF_LOG_ERROR, "Could not create" +                        " task for rebalance"); +out: +        return NULL; +} + +int +gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) +{ +        int      ret    = 0; +        uint64_t files  = 0; +        uint64_t size   = 0; +        uint64_t lookup = 0; + +        if (!defrag) +                goto out; + +        ret = 0; +        if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) +                goto out; + +        files  = defrag->total_files; +        size   = defrag->total_data; +        lookup = defrag->num_files_lookedup; + +        if (!dict) +                goto log; + +        ret = dict_set_uint64 (dict, "files", files); +        if (ret) +                gf_log (THIS->name, GF_LOG_WARNING, +                        "failed to set file count"); + +        ret = dict_set_uint64 (dict, "size", size); +        if (ret) +                gf_log (THIS->name, GF_LOG_WARNING, +                        "failed to set size of xfer"); + +        ret = dict_set_uint64 (dict, "lookups", lookup); +        if (ret) +                gf_log (THIS->name, GF_LOG_WARNING, +                        "failed to set lookedup file count"); + +        ret = dict_set_int32 (dict, "status", defrag->defrag_status); +        if (ret) +                gf_log (THIS->name, GF_LOG_WARNING, +                        "failed to set status"); +log: +        gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %" +                PRIu64", lookups: %"PRIu64, files, size, lookup); + + +out: +        return 0; +} + +int +gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output) +{ +        /* TODO: set a variable 'stop_defrag' here, it should be checked +           in defrag loop */ +        int     ret = -1; +        GF_ASSERT (defrag); + +        if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { +                goto out; +        } + +        defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED; + +        gf_defrag_status_get (defrag, output); +        ret = 0; +out: +        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); +        return ret; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 18fee7cd3..816bf868e 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -182,11 +182,20 @@ out:  int  notify (xlator_t *this, int event, void *data, ...)  { -        int ret = -1; +        int              ret = -1; +        va_list          ap; +        dict_t          *output = NULL;          GF_VALIDATE_OR_GOTO ("dht", this, out); -        ret = dht_notify (this, event, data); + +        if (!data) +                goto out; + +        va_start (ap, data); +        output = va_arg (ap, dict_t*); + +        ret = dht_notify (this, event, data, output);  out:          return ret; @@ -343,10 +352,13 @@ out:  int  init (xlator_t *this)  { -        dht_conf_t    *conf = NULL; -        char          *temp_str = NULL; -        int            ret = -1; -        int            i = 0; +        dht_conf_t                      *conf           = NULL; +        char                            *temp_str       = NULL; +        int                              ret            = -1; +        int                              i              = 0; +        gf_defrag_info_t                *defrag         = NULL; +        int                              cmd            = 0; +          GF_VALIDATE_OR_GOTO ("dht", this, err); @@ -366,6 +378,24 @@ init (xlator_t *this)                  goto err;          } +        ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd); + +        if (cmd) { +                defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t), +                                    gf_defrag_info_mt); + +                GF_VALIDATE_OR_GOTO (this->name, defrag, err); + +                LOCK_INIT (&defrag->lock); + +                defrag->is_exiting = 0; + +                defrag->cmd = cmd; + +                conf->defrag = defrag; +        } + +          conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;          if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {                  /* If option is not "auto", other options _should_ be boolean */ @@ -550,5 +580,9 @@ struct volume_options options[] = {          { .key  = {"decommissioned-bricks"},            .type = GF_OPTION_TYPE_ANY,          }, +        { .key  = {"rebalance-cmd"}, +          .type = GF_OPTION_TYPE_INT, +        }, +          { .key  = {NULL} },  }; diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index c170972ce..fc9c9cf0a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -31,6 +31,7 @@  #include "glusterd-utils.h"  #include "glusterd-volgen.h"  #include "run.h" +#include <sys/signal.h>  /* misc */ @@ -1384,8 +1385,6 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)          switch (volinfo->defrag_status) {          case GF_DEFRAG_STATUS_FAILED:          case GF_DEFRAG_STATUS_COMPLETE: -        case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: -        case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:                  volinfo->defrag_status = 0;          default:                  break; @@ -1420,6 +1419,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)          int32_t             replica_count  = 0;          glusterd_brickinfo_t *brickinfo    = NULL;          glusterd_brickinfo_t *tmp          = NULL; +        glusterd_conf_t      *priv         = NULL; +        char                  pidfile[PATH_MAX]; +          ret = dict_get_str (dict, "volname", &volname); @@ -1456,7 +1458,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                          if (volinfo->defrag) {                                  LOCK (&volinfo->defrag->lock); -                                volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED; +                                //volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;                                  UNLOCK (&volinfo->defrag->lock);                          } @@ -1470,13 +1472,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)          case GF_OP_CMD_ABORT:          {                  if (volinfo->decommission_in_progress) { -                        if (volinfo->defrag) { -                                LOCK (&volinfo->defrag->lock); +                        priv = THIS->private; +                        if (!priv) +                                return ret; -                                volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED; +                        GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + +                        glusterd_service_stop ("rebalance", pidfile, SIGTERM, 1); -                                UNLOCK (&volinfo->defrag->lock); -                        }                  }                  /* Fall back to the old volume file */ @@ -1577,8 +1580,6 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)          switch (volinfo->defrag_status) {          case GF_DEFRAG_STATUS_FAILED:          case GF_DEFRAG_STATUS_COMPLETE: -        case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: -        case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:                  volinfo->defrag_status = 0;          default:                  break; diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index b80164e8d..b06dd28cf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -346,6 +346,11 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,          if (ret)                  goto out; +        snprintf (key, 256, "volume%d.rebalance", count); +        ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd); +        if (ret) +                goto out; +          list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {                  char    brick[1024] = {0,};                  snprintf (key, 256, "volume%d.brick%d", count, i); @@ -2046,6 +2051,7 @@ glusterd_rpc_create (struct rpc_clnt **rpc,          GF_ASSERT (this);          GF_ASSERT (options); +          new_rpc = rpc_clnt_new (options, this->ctx, this->name);          if (!new_rpc) diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 4a0561d1e..2a4bf82ee 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -150,6 +150,8 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin  {          int                     ret = -1;          gd1_mgmt_brick_op_req   *brick_req = NULL; +        char                    *volname = NULL; +        char                    name[1024] = {0,};          GF_ASSERT (op < GD_OP_MAX);          GF_ASSERT (op > GD_OP_NONE); @@ -204,6 +206,21 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin                  brick_req->name = "";          }                  break; +        case GD_OP_REBALANCE: +        case GD_OP_DEFRAG_BRICK_VOLUME: +                brick_req = GF_CALLOC (1, sizeof (*brick_req), +                                       gf_gld_mt_mop_brick_req_t); +                if (!brick_req) +                        goto out; + +                brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG; +                ret = dict_get_str (dict, "volname", &volname); +                if (ret) +                        goto out; +                snprintf (name, 1024, "%s-dht",volname); +                brick_req->name = gf_strdup (name); + +                break;          default:                  goto out;          break; @@ -1617,6 +1634,7 @@ glusterd_op_build_payload (dict_t **req)                  case GD_OP_HEAL_VOLUME:                  case GD_OP_STATEDUMP_VOLUME:                  case GD_OP_CLEARLOCKS_VOLUME: +                case GD_OP_DEFRAG_BRICK_VOLUME:                          {                                  dict_t  *dict = ctx;                                  dict_copy (dict, req_dict); @@ -2173,6 +2191,7 @@ glusterd_need_brick_op (glusterd_op_t op)          switch (op) {          case GD_OP_PROFILE_VOLUME:          case GD_OP_STATUS_VOLUME: +        case GD_OP_DEFRAG_BRICK_VOLUME:                  ret = _gf_true;                  break;          default: @@ -2368,6 +2387,7 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,                          break;                  case GD_OP_REBALANCE: +                case GD_OP_DEFRAG_BRICK_VOLUME:                          ret = glusterd_op_stage_rebalance (dict, op_errstr);                          break; @@ -2464,6 +2484,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,                          break;                  case GD_OP_REBALANCE: +                case GD_OP_DEFRAG_BRICK_VOLUME:                          ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);                          break; @@ -2613,6 +2634,10 @@ glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo,                                                          op_ctx, op_errstr);                  break; +        case GD_OP_DEFRAG_BRICK_VOLUME: +                dict_copy (rsp_dict, op_ctx); +        break; +          default:                  break;          } @@ -2754,6 +2779,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)          priv = this->private;          GF_ASSERT (priv); +          ret = dict_get_str (dict, "volname", &volname);          if (ret) {                  gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); @@ -2963,6 +2989,56 @@ out:  } + +static int +glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr) +{ +        int                                     ret = -1; +        char                                    *volname = NULL; +        glusterd_volinfo_t                      *volinfo = NULL; +        xlator_t                                *this = NULL; +        char                                    msg[2048] = {0,}; +        glusterd_pending_node_t                 *pending_node = NULL; + +        this = THIS; +        GF_ASSERT (this); + + +        ret = dict_get_str (dict, "volname", &volname); +        if (ret) { +                gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); +                goto out; +        } + +        ret = glusterd_volinfo_find (volname, &volinfo); +        if (ret) { +                snprintf (msg, sizeof (msg), "Volume %s does not exist", +                          volname); + +                *op_errstr = gf_strdup (msg); +                gf_log ("", GF_LOG_ERROR, "%s", msg); +                goto out; +        } +        pending_node = GF_CALLOC (1, sizeof (*pending_node), +                                  gf_gld_mt_pending_node_t); +        if (!pending_node) { +                ret = -1; +                goto out; +        } else { +                pending_node->node = volinfo; +                pending_node->type = GD_NODE_REBALANCE; +                list_add_tail (&pending_node->list, +                               &opinfo.pending_bricks); +                pending_node = NULL; +        } + +out: +        return ret; +} + + + +  static int  glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr)  { @@ -3196,6 +3272,9 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)                  ret = glusterd_bricks_select_status_volume (dict, op_errstr);                  break; +        case GD_OP_DEFRAG_BRICK_VOLUME: +                ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr); +                break;          default:                  break;           } @@ -3758,6 +3837,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx)                  case GD_OP_HEAL_VOLUME:                  case GD_OP_STATEDUMP_VOLUME:                  case GD_OP_CLEARLOCKS_VOLUME: +                case GD_OP_DEFRAG_BRICK_VOLUME:                          dict_unref (ctx);                          break;                  default: diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index e5a907eaa..0fe8d3899 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -46,363 +46,35 @@  #include "cli1-xdr.h"  #include "xdr-generic.h" -/* return values - 0: success, +ve: stopped, -ve: failure */ -int -gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) -{ -        int                     ret                    = -1; -        DIR                    *fd                     = NULL; -        glusterd_defrag_info_t *defrag                 = NULL; -        struct dirent          *entry                  = NULL; -        struct stat             stbuf                  = {0,}; -        char                    full_path[PATH_MAX]    = {0,}; -        char                    linkinfo[PATH_MAX]     = {0,}; -        char                    force_string[64]       = {0,}; - -        if (!volinfo->defrag) -                goto out; - -        defrag = volinfo->defrag; - -        fd = opendir (dir); -        if (!fd) -                goto out; - -        if ((defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) || -            (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)) { -                strcpy (force_string, "force"); -        } else { -                strcpy (force_string, "not-force"); -        } - -        while ((entry = readdir (fd))) { -                if (!entry) -                        break; - -                /* We have to honor 'stop' (or 'pause'|'commit') as early -                   as possible */ -                if (volinfo->defrag_status != -                    GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) { -                        /* It can be one of 'stopped|paused|commit' etc */ -                        closedir (fd); -                        ret = 1; -                        goto out; -                } - -                if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) -                        continue; - -                snprintf (full_path, PATH_MAX, "%s/%s", dir, entry->d_name); - -                ret = lstat (full_path, &stbuf); -                if (ret == -1) -                        continue; - -                if (S_ISDIR (stbuf.st_mode)) -                        continue; - -                defrag->num_files_lookedup += 1; - -                /* TODO: bring in feature to support hardlink rebalance */ -                if (stbuf.st_nlink > 1) -                        continue; - -                /* if distribute is present, it will honor this key. -                   -1 is returned if distribute is not present or file doesn't -                   have a link-file. If file has link-file, the path of -                   link-file will be the value, and also that guarantees -                   that file has to be mostly migrated */ -                ret = sys_lgetxattr (full_path, GF_XATTR_LINKINFO_KEY, -                                     &linkinfo, PATH_MAX); -                if (ret <= 0) -                        continue; - -                ret = sys_lsetxattr (full_path, "distribute.migrate-data", -                                     force_string, strlen (force_string), 0); - -                /* if errno is not ENOSPC or ENOTCONN, we can still continue -                   with rebalance process */ -                if ((ret == -1) && ((errno != ENOSPC) || -                                    (errno != ENOTCONN))) -                        continue; - -                if ((ret == -1) && (errno == ENOTCONN)) { -                        /* Most probably mount point went missing (mostly due -                           to a brick down), say rebalance failure to user, -                           let him restart it if everything is fine */ -                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; -                        break; -                } - -                if ((ret == -1) && (errno == ENOSPC)) { -                        /* rebalance process itself failed, may be -                           remote brick went down, or write failed due to -                           disk full etc etc.. */ -                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; -                        break; -                } - -                LOCK (&defrag->lock); -                { -                        defrag->total_files += 1; -                        defrag->total_data += stbuf.st_size; -                } -                UNLOCK (&defrag->lock); -        } -        closedir (fd); - -        fd = opendir (dir); -        if (!fd) -                goto out; -        while ((entry = readdir (fd))) { -                if (!entry) -                        break; - -                /* We have to honor 'stop' (or 'pause'|'commit') as early -                   as possible */ -                if (volinfo->defrag_status != -                    GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) { -                        /* It can be one of 'stopped|paused|commit' etc */ -                        closedir (fd); -                        ret = 1; -                        goto out; -                } - -                if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) -                        continue; - -                snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); - -                ret = lstat (full_path, &stbuf); -                if (ret == -1) -                        continue; - -                if (!S_ISDIR (stbuf.st_mode)) -                        continue; - -                ret = gf_glusterd_rebalance_move_data (volinfo, full_path); -                if (ret) -                        break; -        } -        closedir (fd); - -        if (!entry) -                ret = 0; -out: -        return ret; -} +int32_t +glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov, +                          int count, void *myframe); -/* return values - 0: success, +ve: stopped, -ve: failure */  int -gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir) +glusterd_defrag_update_state (glusterd_volinfo_t *volinfo, +                              glusterd_defrag_info_t *defrag)  { -        int            ret             = -1; -        char           full_path[1024] = {0,}; -        struct stat    stbuf           = {0,}; -        DIR           *fd              = NULL; -        struct dirent *entry           = NULL; +        int     ret             = -1; +        int     cmd             = 0; -        if (!volinfo->defrag) -                goto out; - -        fd = opendir (dir); -        if (!fd) -                goto out; - -        while ((entry = readdir (fd))) { -                if (!entry) -                        break; - -                /* We have to honor 'stop' (or 'pause'|'commit') as early -                   as possible */ -                if (volinfo->defrag_status != -                    GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED) { -                        /* It can be one of 'stopped|paused|commit' etc */ -                        closedir (fd); -                        ret = 1; -                        goto out; -                } - -                if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) -                        continue; - -                snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); - -                ret = lstat (full_path, &stbuf); -                if (ret == -1) -                        continue; - -                if (S_ISDIR (stbuf.st_mode)) { -                        /* Fix the layout of the directory */ -                        /* TODO: isn't error code not important ? */ -                        sys_lsetxattr (full_path, "distribute.fix.layout", -                                       "yes", 3, 0); - -                        volinfo->defrag->total_files += 1; - -                        /* Traverse into subdirectory */ -                        ret = gf_glusterd_rebalance_fix_layout (volinfo, -                                                                full_path); -                        if (ret) -                                break; -                } -        } -        closedir (fd); - -        if (!entry) -                ret = 0; - -out: -        return ret; -} - -void * -glusterd_defrag_start (void *data) -{ -        glusterd_volinfo_t     *volinfo = data; -        glusterd_defrag_info_t *defrag  = NULL; -        int                     ret     = -1; -        struct stat             stbuf   = {0,}; - -        THIS = volinfo->xl; -        defrag = volinfo->defrag; -        if (!defrag) -                goto out; - -        sleep (1); -        ret = lstat (defrag->mount, &stbuf); -        if ((ret == -1) && (errno == ENOTCONN)) { -                /* Wait for some more time before starting rebalance */ -                sleep (2); -                ret = lstat (defrag->mount, &stbuf); -                if (ret == -1) { -                        volinfo->defrag_status   = GF_DEFRAG_STATUS_FAILED; -                        volinfo->rebalance_files = 0; -                        volinfo->rebalance_data  = 0; -                        volinfo->lookedup_files  = 0; -                        goto out; -                } -        } - -        /* Fix the root ('/') first */ -        sys_lsetxattr (defrag->mount, "distribute.fix.layout", -                       "yes", 3, 0); - -        if ((defrag->cmd == GF_DEFRAG_CMD_START) || -            (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX)) { -                /* root's layout got fixed */ -                defrag->total_files = 1; - -                /* Step 1: Fix layout of all the directories */ -                ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount); -                if (ret < 0) -                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; -                /* in both 'stopped' or 'failure' cases goto out */ -                if (ret) { -                        goto out; -                } - -                /* Completed first step */ -                volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; -        } - -        if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { -                /* It was used by number of layout fixes on directories */ -                defrag->total_files = 0; - -                volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED; - -                /* Step 2: Iterate over directories to move data */ -                ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount); -                if (ret < 0) -                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; -                /* in both 'stopped' or 'failure' cases goto out */ -                if (ret) { -                        goto out; -                } - -                /* Completed second step */ -                volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE; -        } - -        /* Completed whole process */ -        if ((defrag->cmd == GF_DEFRAG_CMD_START) || -            (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)) -                volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; - -        volinfo->rebalance_files = defrag->total_files; -        volinfo->rebalance_data  = defrag->total_data; -        volinfo->lookedup_files  = defrag->num_files_lookedup; -out: -        volinfo->defrag = NULL; -        if (defrag) { -                gf_log ("rebalance", GF_LOG_INFO, "rebalance on %s complete", -                        defrag->mount); - -                ret = runcmd ("umount", "-l", defrag->mount, NULL); -                LOCK_DESTROY (&defrag->lock); - -                if (defrag->cbk_fn) { -                        defrag->cbk_fn (volinfo, volinfo->defrag_status); -                } - -                GF_FREE (defrag); -        } -        return NULL; -} - -int -glusterd_defrag_stop_validate (glusterd_volinfo_t *volinfo, -                               char *op_errstr, size_t len) -{ -        int     ret = -1; -        if (glusterd_is_defrag_on (volinfo) == 0) { -                snprintf (op_errstr, len, "Rebalance on %s is either Completed " -                          "or not yet started", volinfo->volname); -                goto out; -        } -        ret = 0; -out: -        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); -        return ret; -} - -int -glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files, -                      u_quad_t *size, char *op_errstr, size_t len) -{ -        /* TODO: set a variable 'stop_defrag' here, it should be checked -           in defrag loop */ -        int     ret = -1;          GF_ASSERT (volinfo); -        GF_ASSERT (files); -        GF_ASSERT (size); -        GF_ASSERT (op_errstr); - -        if (!volinfo) { -                ret = -1; -                goto out; -        } +        GF_ASSERT (defrag); -        ret = glusterd_defrag_stop_validate (volinfo, op_errstr, len); -        if (ret) { -                /* rebalance may be happening on other nodes */ -                ret = 0; -                goto out; -        } - -        ret = 0;          if (volinfo->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {                  goto out;          } -        LOCK (&volinfo->defrag->lock); +        LOCK (&defrag->lock);          { -                volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED; -                *files = volinfo->defrag->total_files; -                *size = volinfo->defrag->total_data; +                cmd = defrag->cmd; +                if ((cmd == GF_DEFRAG_CMD_START) || (cmd == +                        GF_DEFRAG_CMD_START_FORCE) || (cmd == +                        GF_DEFRAG_CMD_START_LAYOUT_FIX)) +                        volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; +                else if (cmd == GF_DEFRAG_CMD_STOP) +                        volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;          } -        UNLOCK (&volinfo->defrag->lock); +        UNLOCK (&defrag->lock);          ret = 0;  out: @@ -475,10 +147,9 @@ glusterd_rebalance_cmd_attempted_log (int cmd, char *volname)                          gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "                                  "volume start layout fix on %s", volname);                          break; -                case GF_DEFRAG_CMD_START_MIGRATE_DATA: -                case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE: +                case GF_DEFRAG_CMD_START_FORCE:                          gf_cmd_log ("Volume rebalance"," on volname: %s " -                                    "cmd: start data migrate attempted", +                                    "cmd: start data force attempted",                                      volname);                          gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "                                  "volume start migrate data on %s", volname); @@ -539,6 +210,97 @@ out:          return ret;  } +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, +                        rpc_clnt_event_t event, void *data) +{ +        glusterd_volinfo_t      *volinfo = NULL; +        glusterd_defrag_info_t  *defrag  = NULL; +        int                     ret      = 0; +        char                    pidfile[PATH_MAX]; +        glusterd_conf_t        *priv    = NULL; + +        priv = THIS->private; +        if (!priv) +                return 0; + +        volinfo = mydata; +        if (!volinfo) +                return 0; + +        defrag = volinfo->defrag; +        if (!defrag) +                return 0; + +        if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) +                volinfo->defrag = NULL; + +        GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + +        switch (event) { +        case RPC_CLNT_CONNECT: +        { +                if (defrag->connected) +                        return 0; + +                LOCK (&defrag->lock); +                { +                        defrag->connected = 1; +                } +                UNLOCK (&defrag->lock); + +               gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT", +                        rpc->conn.trans->name); +               break; +        } + +        case RPC_CLNT_DISCONNECT: +        { +                if (!defrag->connected) +                        return 0; + +                LOCK (&defrag->lock); +                { +                        defrag->connected = 0; +                } +                UNLOCK (&defrag->lock); + +                if (!glusterd_is_service_running (pidfile, NULL)) { +                        glusterd_defrag_update_state (volinfo, defrag); +                } else { +                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; +                } + +                /* Success or failure, Reset cmd in volinfo */ + +                volinfo->defrag_cmd = 0; + +                glusterd_store_volinfo (volinfo, +                                        GLUSTERD_VOLINFO_VER_AC_INCREMENT); + +                if (defrag->rpc) { +                        rpc_clnt_unref (defrag->rpc); +                        defrag->rpc = NULL; +                } +                if (defrag->cbk_fn) +                        defrag->cbk_fn (volinfo, volinfo->defrag_status); + +                if (defrag) +                        GF_FREE (defrag); +                gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", +                        rpc->conn.trans->name); +                break; +        } +        default: +                gf_log ("", GF_LOG_TRACE, +                        "got some other RPC event %d", event); +                ret = 0; +                break; +        } + +        return ret; +} +  int  glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,                                size_t len, int cmd, defrag_cbk_fn_t cbk) @@ -547,6 +309,11 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,          glusterd_defrag_info_t *defrag =  NULL;          runner_t               runner = {0,};          glusterd_conf_t        *priv = NULL; +        char                   defrag_path[PATH_MAX]; +        struct stat            buf = {0,}; +        char                   sockfile[PATH_MAX] = {0,}; +        char                   pidfile[PATH_MAX] = {0,}; +        dict_t                 *options = NULL;          priv    = THIS->private; @@ -567,18 +334,29 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,          defrag->cmd = cmd;          LOCK_INIT (&defrag->lock); -        snprintf (defrag->mount, 1024, "%s/mount/%s", -                  priv->workdir, volinfo->volname); -        /* Create a directory, mount glusterfs over it, start glusterfs-defrag */ -        runinit (&runner); -        runner_add_args (&runner, "mkdir", "-p", defrag->mount, NULL); -        ret = runner_run_reuse (&runner); -        if (ret) { -                runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed"); + +        volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; + +        volinfo->defrag_cmd = cmd; +        glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + +        GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); +        ret = stat (defrag_path, &buf); +        if (ret && (errno == ENOENT)) { +                runinit (&runner); +                runner_add_args (&runner, "mkdir", "-p", defrag_path, NULL); +                ret = runner_run_reuse (&runner); +                if (ret) { +                        runner_log (&runner, "glusterd", GF_LOG_DEBUG, +                                    "command failed"); +                        runner_end (&runner); +                        goto out; +                }                  runner_end (&runner); -                goto out;          } -        runner_end (&runner); + +        GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); +        GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);          runinit (&runner);          runner_add_args (&runner, SBIN_DIR"/glusterfs", @@ -586,34 +364,37 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,                           "--xlator-option", "*dht.use-readdirp=yes",                           "--xlator-option", "*dht.lookup-unhashed=yes",                           "--xlator-option", "*dht.assert-no-child-down=yes", -                         defrag->mount, NULL); +                         NULL); +        runner_add_arg (&runner, "--xlator-option"); +        runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); +        runner_add_arg (&runner, "--socket-file"); +        runner_argprintf (&runner, "%s",sockfile); +        runner_add_arg (&runner, "--pid-file"); +        runner_argprintf (&runner, "%s",pidfile); +          ret = runner_run_reuse (&runner);          if (ret) {                  runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");                  runner_end (&runner);                  goto out;          } -        runner_end (&runner); -        volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED; -        if ((cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA) || -            (cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE)) { -                volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED; +        ret = rpc_clnt_transport_unix_options_build (&options, sockfile); +        if (ret) { +                gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); +                goto out; +        } + +        ret = glusterd_rpc_create (&defrag->rpc, options, +                                   glusterd_defrag_notify, volinfo); +        if (ret) { +                gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); +                goto out;          }          if (cbk)                  defrag->cbk_fn = cbk; -        ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start, -                              volinfo); -        if (ret) { -                runinit (&runner); -                runner_add_args (&runner, "umount", "-l", defrag->mount, NULL); -                ret = runner_run_reuse (&runner); -                if (ret) -                        runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed"); -                runner_end (&runner); -        }  out:          gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);          return ret; @@ -712,7 +493,13 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)          if (ret)                  goto out; -        ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict); +        if ((cmd == GF_DEFRAG_CMD_STATUS) || +              (cmd == GF_DEFRAG_CMD_STOP)) { +                ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, +                                         dict); +        } +        else +                ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);  out: @@ -762,8 +549,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)          switch (cmd) {          case GF_DEFRAG_CMD_START:          case GF_DEFRAG_CMD_START_LAYOUT_FIX: -        case GF_DEFRAG_CMD_START_MIGRATE_DATA: -        case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE: +        case GF_DEFRAG_CMD_START_FORCE:                  ret = glusterd_defrag_start_validate (volinfo,                                                        msg, sizeof (msg));                  if (ret) { @@ -771,6 +557,33 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)                                  "start validate failed");                          goto out;                  } +                break; +        case GF_DEFRAG_CMD_STATUS: +                ret = glusterd_is_defrag_on (volinfo); +                if (!ret) { +                        ret = -1; +                        if (volinfo->defrag_status == +                                GF_DEFRAG_STATUS_COMPLETE) { +                                snprintf (msg, sizeof (msg), "Rebalance " +                                          "completed!"); +                                goto out; +                        } +                        snprintf (msg, sizeof(msg), "Rebalance is not running" +                                  " on volume %s", volname); +                        goto out; +                } +                break; + +        case GF_DEFRAG_CMD_STOP: +                ret = glusterd_is_defrag_on (volinfo); +                if (!ret) { +                        gf_log (THIS->name, GF_LOG_DEBUG, +                                "rebalance is not running"); +                        ret = -1; +                        snprintf (msg, sizeof(msg), "Rebalance is not running" +                                  " on volume %s", volname); +                        goto out; +                }          default:                  break;          } @@ -792,11 +605,8 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)          int32_t             cmd       = 0;          char                msg[2048] = {0};          glusterd_volinfo_t *volinfo   = NULL; -        uint64_t            files     = 0; -        uint64_t            size      = 0;          void               *node_uuid = NULL;          glusterd_conf_t    *priv      = NULL; -        dict_t             *tmp_dict  = NULL;          priv = THIS->private; @@ -839,39 +649,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)          switch (cmd) {          case GF_DEFRAG_CMD_START:          case GF_DEFRAG_CMD_START_LAYOUT_FIX: -        case GF_DEFRAG_CMD_START_MIGRATE_DATA: -        case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE: +        case GF_DEFRAG_CMD_START_FORCE:                  ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),                                                      cmd, NULL);                   break; -         case GF_DEFRAG_CMD_STOP: -                 ret = glusterd_defrag_stop (volinfo, &files, &size, -                                             msg, sizeof (msg)); -                if (!ret && rsp_dict) { -                        ret = dict_set_uint64 (rsp_dict, "files", files); -                        if (ret) -                                gf_log (THIS->name, GF_LOG_WARNING, -                                        "failed to set file count"); - -                        ret = dict_set_uint64 (rsp_dict, "size", size); -                        if (ret) -                                gf_log (THIS->name, GF_LOG_WARNING, -                                        "failed to set xfer size"); - -                        /* Don't want to propagate errors from dict_set() */ -                        ret = 0; -                } -                break; +        case GF_DEFRAG_CMD_STOP:          case GF_DEFRAG_CMD_STATUS: - -                if (rsp_dict) -                        tmp_dict = rsp_dict; - -                /* On source node, there will be no 'rsp_dict' */ -                if (!tmp_dict) -                        tmp_dict = glusterd_op_get_ctx (GD_OP_REBALANCE); - -                ret = glusterd_defrag_status_get (volinfo, tmp_dict);                  break;          default:                  break; diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 2cf17b3f7..537496f08 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -82,6 +82,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,                  break;          }          case GD_OP_REBALANCE: +        case GD_OP_DEFRAG_BRICK_VOLUME:          {                  if (ctx) {                          ret = dict_get_int32 (ctx, "status", &status); @@ -1058,7 +1059,8 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)          GF_ASSERT (rsp_dict);          op = glusterd_op_get_op (); -        GF_ASSERT (GD_OP_REBALANCE == op); +        GF_ASSERT ((GD_OP_REBALANCE == op) || +                   (GD_OP_DEFRAG_BRICK_VOLUME == op));          ctx_dict = glusterd_op_get_ctx (op); @@ -1224,10 +1226,7 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,                  break;                  case GD_OP_REBALANCE: -                        ret = glusterd_volume_rebalance_use_rsp_dict (dict); -                        if (ret) -                                goto out; - +                case GD_OP_DEFRAG_BRICK_VOLUME:                  break;                  default: diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 4fe8f71cb..18d60d0a4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -622,6 +622,15 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)          if (ret)                  goto out; +        if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd); +        ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, +                                        buf); +        if (ret) +                goto out; +  out:          if (ret)                  gf_log ("", GF_LOG_ERROR, "Unable to write volume values" @@ -1860,6 +1869,9 @@ glusterd_store_retrieve_volume (char    *volname)                          }                          gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" "                                  " slave:key=%s,value:%s", key, value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, +                                     strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { +                        volinfo->defrag_cmd = atoi (value);                  }                  else {                          exists = glusterd_check_option_exists (key, NULL); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index f1413955b..f55fb8c2e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -59,6 +59,7 @@ typedef enum glusterd_store_ver_ac_{  #define GLUSTERD_STORE_KEY_RB_STATUS      "rb_status"  #define GLUSTERD_STORE_KEY_RB_SRC_BRICK   "rb_src"  #define GLUSTERD_STORE_KEY_RB_DST_BRICK   "rb_dst" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG     "rebalance_status"  #define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"  #define GLUSTERD_STORE_KEY_BRICK_PATH     "path" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 42924a5f6..9ec9e16f1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2016,6 +2016,15 @@ glusterd_import_volinfo (dict_t *vols, int count,                  goto out;          } +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "volume%d.rebalance", count); +        ret = dict_get_uint32 (vols, key, &new_volinfo->defrag_cmd); +        if (ret) { +                snprintf (msg, sizeof (msg), "%s missing in payload for %s", +                          key, volname); +                goto out; +        } +          uuid_parse (volume_id_str, new_volinfo->volume_id);          memset (key, 0, sizeof (key)); @@ -2438,6 +2447,7 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)          struct rpc_clnt *rpc = NULL;          glusterd_brickinfo_t    *brickinfo = NULL;          nodesrv_t               *shd       = NULL; +        glusterd_volinfo_t      *volinfo   = NULL;          GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);          GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out); @@ -2449,6 +2459,11 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)                  shd       = pending_node->node;                  rpc       = shd->rpc; +        } else if (pending_node->type == GD_NODE_REBALANCE) { +                volinfo = pending_node->node; +                if (volinfo->defrag) +                        rpc = volinfo->defrag->rpc; +          } else {                  GF_ASSERT (0);          } @@ -4811,3 +4826,42 @@ glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,                  snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol",                            path, volinfo->volname);  } + +int +glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, +                              size_t len, int cmd, defrag_cbk_fn_t cbk) +{ +        glusterd_conf_t         *priv                   = NULL; +        char                     pidfile[PATH_MAX]; +        int                      ret                    = -1; +        pid_t                    pid; + +        priv = THIS->private; +        if (!priv) +                return ret; + +        GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + +        if (!glusterd_is_service_running (pidfile, &pid)) { +                glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd, +                                              cbk); +        } + +        return ret; +} + +int +glusterd_restart_rebalance (glusterd_conf_t *conf) +{ +        glusterd_volinfo_t       *volinfo = NULL; +        int                      ret = 0; +        char                     op_errstr[256]; + +        list_for_each_entry (volinfo, &conf->volumes, vol_list) { +                if (!volinfo->defrag_cmd) +                        continue; +                glusterd_volume_defrag_restart (volinfo, op_errstr, 256, +                                                volinfo->defrag_cmd, NULL); +        } +        return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 6f9a5e14d..e52b25e31 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -386,4 +386,6 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid);  void  glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,                                gf_transport_type type); +int +glusterd_restart_rebalance (glusterd_conf_t *conf);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index e9f3bd055..28e80310e 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -999,6 +999,8 @@ init (xlator_t *this)          ret = glusterd_restart_gsyncds (conf);          if (ret)                  goto out; + +        glusterd_restart_rebalance (conf);          ret = 0;  out:          if (ret < 0) { diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 60dbe61e0..e200f49d1 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -79,6 +79,7 @@ typedef enum glusterd_op_ {          GD_OP_STATEDUMP_VOLUME,          GD_OP_LIST_VOLUME,          GD_OP_CLEARLOCKS_VOLUME, +        GD_OP_DEFRAG_BRICK_VOLUME,          GD_OP_MAX,  } glusterd_op_t; @@ -164,6 +165,9 @@ struct glusterd_defrag_info_ {          gf_lock_t                    lock;          int                          cmd;          pthread_t                    th; +        gf_defrag_status_t           defrag_status; +        struct rpc_clnt            * rpc; +        uint32_t                     connected;          char                         mount[1024];          char                         databuf[131072];          struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */ @@ -210,6 +214,7 @@ struct glusterd_volinfo_ {          uint64_t                rebalance_data;          uint64_t                lookedup_files;          glusterd_defrag_info_t  *defrag; +        gf_cli_defrag_type      defrag_cmd;          /* Replace brick status */          gf_rb_status_t          rb_status; @@ -235,7 +240,8 @@ struct glusterd_volinfo_ {  typedef enum gd_node_type_ {          GD_NODE_NONE,          GD_NODE_BRICK, -        GD_NODE_SHD +        GD_NODE_SHD, +        GD_NODE_REBALANCE,  } gd_node_type;  typedef struct glusterd_pending_node_ { @@ -315,6 +321,27 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);  		STACK_DESTROY (frame->root);\  	} while (0) +#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do {               \ +                char vol_path[PATH_MAX];                                \ +                GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv);       \ +                snprintf (path, PATH_MAX, "%s/rebalance",vol_path);     \ +        } while (0) + +#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do {         \ +                char defrag_path[PATH_MAX];                             \ +                GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv);    \ +                snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path,    \ +                           uuid_utoa(priv->uuid));                      \ +        } while (0) + +#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do {          \ +                char defrag_path[PATH_MAX];                             \ +                GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv);    \ +                snprintf (path, PATH_MAX, "%s/%s.pid", defrag_path,     \ +                           uuid_utoa(priv->uuid));                      \ +        } while (0) + +  int32_t  glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo,                                 char **new_brick);  | 
