diff options
22 files changed, 855 insertions, 177 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 3e68b2cce8e..8ea6581aff8 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -753,7 +753,7 @@ out:  int32_t  cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, -                                   dict_t **options) +                                   dict_t **options, int *question)  {          dict_t  *dict = NULL;          char    *volname = NULL; @@ -765,6 +765,10 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,          int32_t j = 0;          char    *tmp_brick = NULL;          char    *tmp_brick1 = NULL; +        char    *opwords[] = { "start", "commit", "pause", "abort", "status", +                               "force", NULL }; +        char    *w = NULL; +        int32_t  command = GF_OP_CMD_NONE;          GF_ASSERT (words);          GF_ASSERT (options); @@ -782,19 +786,53 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,          GF_ASSERT (volname);          ret = dict_set_str (dict, "volname", volname); -          if (ret)                  goto out; +        w = str_getunamb (words[wordcount - 1], opwords); +        if (!w) { +                /* Should be default 'force' */ +                command = GF_OP_CMD_COMMIT_FORCE; +                if (question) +                        *question = 1; +        } else { +                /* handled this option */ +                wordcount--; +                if (!strcmp ("start", w)) { +                        command = GF_OP_CMD_START; +                } else if (!strcmp ("commit", w)) { +                        command = GF_OP_CMD_COMMIT; +                        if (question) +                                *question = 1; +                } else if (!strcmp ("pause", w)) { +                        command = GF_OP_CMD_PAUSE; +                } else if (!strcmp ("abort", w)) { +                        command = GF_OP_CMD_ABORT; +                } else if (!strcmp ("status", w)) { +                        command = GF_OP_CMD_STATUS; +                } else if (!strcmp ("force", w)) { +                        command = GF_OP_CMD_COMMIT_FORCE; +                        if (question) +                                *question = 1; +                } else { +                        GF_ASSERT (!"opword mismatch"); +                        ret = -1; +                        goto out; +                } +        } +          if (wordcount < 4) {                  ret = -1;                  goto out;          } -        brick_index = 3; - +        ret = dict_set_int32 (dict, "command", command);          if (ret) -                goto out; +                gf_log ("cli", GF_LOG_INFO, "failed to set 'command' %d", +                        command); + + +        brick_index = 3;          tmp_index = brick_index;          tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char); @@ -805,7 +843,7 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,                  ret = -1;                  goto out;          } -  +          tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);          if (!tmp_brick1) { @@ -850,7 +888,6 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,          }          ret = dict_set_int32 (dict, "count", brick_count); -          if (ret)                  goto out; diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 68c5ef57870..16dc32328d2 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -800,6 +800,7 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,          gf_answer_t             answer = GF_ANSWER_NO;          int                     sent = 0;          int                     parse_error = 0; +        int                     need_question = 0;          const char *question = "Removing brick(s) can result in data loss. "                                 "Do you want to Continue?"; @@ -808,7 +809,8 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,          if (!frame)                  goto out; -        ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options); +        ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options, +                                                 &need_question);          if (ret) {                  cli_usage_out (word->pattern); @@ -816,11 +818,13 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,                  goto out;          } -        answer = cli_cmd_get_confirmation (state, question); - -        if (GF_ANSWER_NO == answer) { -                ret = 0; -                goto out; +        if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) { +                /* we need to ask question only in case of 'commit or force' */ +                answer = cli_cmd_get_confirmation (state, question); +                if (GF_ANSWER_NO == answer) { +                        ret = 0; +                        goto out; +                }          }          proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK]; @@ -1304,7 +1308,7 @@ struct cli_cmd volume_cmds[] = {            cli_cmd_volume_add_brick_cbk,            "add brick to volume <VOLNAME>"}, -        { "volume remove-brick <VOLNAME> <BRICK> ...", +        { "volume remove-brick <VOLNAME> <BRICK> ... {start|pause|abort|status|commit|force}",            cli_cmd_volume_remove_brick_cbk,            "remove brick from volume <VOLNAME>"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 3d6ce25ef1b..d7a5988f238 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -856,23 +856,36 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                                           "rebalance process");                          goto done;                  } -                if (rsp.op_errno == 0) + +                switch (rsp.op_errno) { +                case GF_DEFRAG_STATUS_NOT_STARTED:                          status = "not started"; -                if (rsp.op_errno == 1) +                        break; +                case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:                          status = "step 1: layout fix in progress"; -                if (rsp.op_errno == 2) +                        break; +                case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:                          status = "step 2: data migration in progress"; -                if (rsp.op_errno == 3) +                        break; +                case GF_DEFRAG_STATUS_STOPPED:                          status = "stopped"; -                if (rsp.op_errno == 4) +                        break; +                case GF_DEFRAG_STATUS_COMPLETE:                          status = "completed"; -                if (rsp.op_errno == 5) +                        break; +                case GF_DEFRAG_STATUS_FAILED:                          status = "failed"; -                if (rsp.op_errno == 6) +                        break; +                case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:                          status = "step 1: layout fix complete"; -                if (rsp.op_errno == 7) +                        break; +                case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:                          status = "step 2: data migration complete"; - +                        break; +                case GF_DEFRAG_STATUS_PAUSED: +                        status = "paused"; +                        break; +                }                  if (rsp.files && (rsp.op_errno == 1)) {                          cli_out ("rebalance %s: fixed layout %"PRId64,                                   status, rsp.files); @@ -1064,6 +1077,87 @@ out:          return ret;  } +int +gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov, +                                 int count, void *myframe) +{ +        gf2_cli_defrag_vol_rsp  rsp     = {0,}; +        char                    *status  = "unknown"; +        int                      ret     = 0; + +        if (-1 == req->rpc_status) { +                goto out; +        } + +        ret = xdr_to_generic (*iov, &rsp, +                              (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp); +        if (ret < 0) { +                gf_log ("", GF_LOG_ERROR, "error"); +                goto out; +        } + +        ret = rsp.op_ret; +        if (rsp.op_ret == -1) { +                if (strcmp (rsp.op_errstr, "")) +                        cli_out ("%s", rsp.op_errstr); +                else +                        cli_out ("failed to get the status of " +                                 "remove-brick process"); +                goto out; +        } + +        switch (rsp.op_errno) { +        case GF_DEFRAG_STATUS_NOT_STARTED: +                status = "not started"; +                break; +        case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED: +        case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED: +        case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: +                status = "in progress"; +                break; +        case GF_DEFRAG_STATUS_STOPPED: +                status = "stopped"; +                break; +        case GF_DEFRAG_STATUS_COMPLETE: +        case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE: +                status = "completed"; +                break; +        case GF_DEFRAG_STATUS_FAILED: +                status = "failed"; +                break; +        case GF_DEFRAG_STATUS_PAUSED: +                status = "paused"; +                break; +        } + +        if (rsp.files && (rsp.op_errno == 1)) { +                cli_out ("remove-brick %s: fixed layout %"PRId64, +                         status, rsp.files); +                goto out; +        } +        if (rsp.files && (rsp.op_errno == 6)) { +                cli_out ("remove-brick %s: fixed layout %"PRId64, +                         status, rsp.files); +                goto out; +        } +        if (rsp.files) { +                cli_out ("remove-brick %s: decommissioned %"PRId64 +                         " files of size %"PRId64, status, +                         rsp.files, rsp.size); +                goto out; +        } + +        cli_out ("remove-brick %s", status); + +out: +        if (rsp.op_errstr) +                free (rsp.op_errstr); //malloced by xdr +        if (rsp.volname) +                free (rsp.volname); //malloced by xdr +        cli_cmd_broadcast_response (ret); +        return ret; +} +  int  gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, @@ -2160,8 +2254,11 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,                           void *data)  {          gf1_cli_remove_brick_req  req = {0,}; +        gf1_cli_defrag_vol_req    status_req = {0,};          int                       ret = 0; -        dict_t                    *dict = NULL; +        dict_t                   *dict = NULL; +        int32_t                   command = 0; +        char                     *volname = NULL;          if (!frame || !this ||  !data) {                  ret = -1; @@ -2170,30 +2267,45 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,          dict = data; -        ret = dict_get_str (dict, "volname", &req.volname); - +        ret = dict_get_str (dict, "volname", &volname);          if (ret)                  goto out;          ret = dict_get_int32 (dict, "count", &req.count); -          if (ret)                  goto out; -        ret = dict_allocate_and_serialize (dict, -                                           &req.bricks.bricks_val, -                                           (size_t *)&req.bricks.bricks_len); -        if (ret < 0) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "failed to get serialized length of dict"); +        ret = dict_get_int32 (dict, "command", &command); +        if (ret)                  goto out; -        } -        ret = cli_cmd_submit (&req, frame, cli_rpc_prog, -                              GLUSTER_CLI_REMOVE_BRICK, NULL, -                              this, gf_cli3_1_remove_brick_cbk, -                              (xdrproc_t) xdr_gf1_cli_remove_brick_req); +        if (command != GF_OP_CMD_STATUS) { +                req.volname = volname; + +                ret = dict_allocate_and_serialize (dict, +                                                   &req.bricks.bricks_val, +                                                   (size_t *)&req.bricks.bricks_len); +                if (ret < 0) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "failed to get serialized length of dict"); +                        goto out; +                } + +                ret = cli_cmd_submit (&req, frame, cli_rpc_prog, +                                      GLUSTER_CLI_REMOVE_BRICK, NULL, +                                      this, gf_cli3_1_remove_brick_cbk, +                                      (xdrproc_t) xdr_gf1_cli_remove_brick_req); +        } else { +                /* Need rebalance status to e sent :-) */ +                status_req.volname = volname; +                status_req.cmd = GF_DEFRAG_CMD_STATUS; +                ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog, +                                      GLUSTER_CLI_DEFRAG_VOLUME, NULL, +                                      this, gf_cli3_remove_brick_status_cbk, +                                      (xdrproc_t) xdr_gf1_cli_defrag_vol_req); + +        }  out:          gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); diff --git a/cli/src/cli.h b/cli/src/cli.h index d3e1fc21bfa..1e0d69cd8a2 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -212,7 +212,7 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,  int32_t  cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, -                                   dict_t **options); +                                   dict_t **options, int *question);  int32_t  cli_cmd_volume_replace_brick_parse (const char **words, int wordcount, diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c index 250efc93575..9030d3067e7 100644 --- a/rpc/xdr/src/cli1-xdr.c +++ b/rpc/xdr/src/cli1-xdr.c @@ -38,6 +38,17 @@ xdr_gf_cli_defrag_type (XDR *xdrs, gf_cli_defrag_type *objp)  }  bool_t +xdr_gf_defrag_status_t (XDR *xdrs, gf_defrag_status_t *objp) +{ +	register int32_t *buf; +        buf = NULL; + +	 if (!xdr_enum (xdrs, (enum_t *) objp)) +		 return FALSE; +	return TRUE; +} + +bool_t  xdr_gf1_cluster_type (XDR *xdrs, gf1_cluster_type *objp)  {  	register int32_t *buf; @@ -60,6 +71,17 @@ xdr_gf1_cli_replace_op (XDR *xdrs, gf1_cli_replace_op *objp)  }  bool_t +xdr_gf1_op_commands (XDR *xdrs, gf1_op_commands *objp) +{ +	register int32_t *buf; +        buf = NULL; + +	 if (!xdr_enum (xdrs, (enum_t *) objp)) +		 return FALSE; +	return TRUE; +} + +bool_t  xdr_gf_quota_type (XDR *xdrs, gf_quota_type *objp)  {  	register int32_t *buf; diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h index 0d606e79ff6..d502c30b491 100644 --- a/rpc/xdr/src/cli1-xdr.h +++ b/rpc/xdr/src/cli1-xdr.h @@ -42,9 +42,23 @@ enum gf_cli_defrag_type {  	GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,  	GF_DEFRAG_CMD_START_MIGRATE_DATA = 1 + 4,  	GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE = 1 + 5, +	GF_DEFRAG_CMD_START_FORCE = 1 + 6,  };  typedef enum gf_cli_defrag_type gf_cli_defrag_type; +enum gf_defrag_status_t { +	GF_DEFRAG_STATUS_NOT_STARTED = 0, +	GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED = 1, +	GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED = 2, +	GF_DEFRAG_STATUS_STOPPED = 3, +	GF_DEFRAG_STATUS_COMPLETE = 4, +	GF_DEFRAG_STATUS_FAILED = 5, +	GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE = 6, +	GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE = 7, +	GF_DEFRAG_STATUS_PAUSED = 8, +}; +typedef enum gf_defrag_status_t gf_defrag_status_t; +  enum gf1_cluster_type {  	GF_CLUSTER_TYPE_NONE = 0,  	GF_CLUSTER_TYPE_STRIPE = 0 + 1, @@ -64,6 +78,17 @@ enum gf1_cli_replace_op {  };  typedef enum gf1_cli_replace_op gf1_cli_replace_op; +enum gf1_op_commands { +	GF_OP_CMD_NONE = 0, +	GF_OP_CMD_START = 0 + 1, +	GF_OP_CMD_COMMIT = 0 + 2, +	GF_OP_CMD_PAUSE = 0 + 3, +	GF_OP_CMD_ABORT = 0 + 4, +	GF_OP_CMD_STATUS = 0 + 5, +	GF_OP_CMD_COMMIT_FORCE = 0 + 6, +}; +typedef enum gf1_op_commands gf1_op_commands; +  enum gf_quota_type {  	GF_QUOTA_OPTION_TYPE_NONE = 0,  	GF_QUOTA_OPTION_TYPE_ENABLE = 0 + 1, @@ -593,8 +618,10 @@ typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;  #if defined(__STDC__) || defined(__cplusplus)  extern  bool_t xdr_gf_cli_defrag_type (XDR *, gf_cli_defrag_type*); +extern  bool_t xdr_gf_defrag_status_t (XDR *, gf_defrag_status_t*);  extern  bool_t xdr_gf1_cluster_type (XDR *, gf1_cluster_type*);  extern  bool_t xdr_gf1_cli_replace_op (XDR *, gf1_cli_replace_op*); +extern  bool_t xdr_gf1_op_commands (XDR *, gf1_op_commands*);  extern  bool_t xdr_gf_quota_type (XDR *, gf_quota_type*);  extern  bool_t xdr_gf1_cli_friends_list (XDR *, gf1_cli_friends_list*);  extern  bool_t xdr_gf1_cli_get_volume (XDR *, gf1_cli_get_volume*); @@ -663,8 +690,10 @@ extern  bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);  #else /* K&R C */  extern bool_t xdr_gf_cli_defrag_type (); +extern bool_t xdr_gf_defrag_status_t ();  extern bool_t xdr_gf1_cluster_type ();  extern bool_t xdr_gf1_cli_replace_op (); +extern bool_t xdr_gf1_op_commands ();  extern bool_t xdr_gf_quota_type ();  extern bool_t xdr_gf1_cli_friends_list ();  extern bool_t xdr_gf1_cli_get_volume (); diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 9fc9f02d29d..ff2f09af3f6 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -4,7 +4,20 @@          GF_DEFRAG_CMD_STATUS,          GF_DEFRAG_CMD_START_LAYOUT_FIX,          GF_DEFRAG_CMD_START_MIGRATE_DATA, -        GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE +        GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE, +        GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */ +} ; + + enum gf_defrag_status_t { +        GF_DEFRAG_STATUS_NOT_STARTED, +        GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED, +        GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED, +        GF_DEFRAG_STATUS_STOPPED, +        GF_DEFRAG_STATUS_COMPLETE, +        GF_DEFRAG_STATUS_FAILED, +        GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE, +        GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE, +        GF_DEFRAG_STATUS_PAUSED  } ;   enum gf1_cluster_type { @@ -24,6 +37,16 @@          GF_REPLACE_OP_COMMIT_FORCE  } ; + enum gf1_op_commands { +        GF_OP_CMD_NONE = 0, +        GF_OP_CMD_START, +        GF_OP_CMD_COMMIT, +        GF_OP_CMD_PAUSE, +        GF_OP_CMD_ABORT, +        GF_OP_CMD_STATUS, +        GF_OP_CMD_COMMIT_FORCE +} ; +  enum gf_quota_type {          GF_QUOTA_OPTION_TYPE_NONE = 0,          GF_QUOTA_OPTION_TYPE_ENABLE, diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 6f8594e30b7..e221e10ab95 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -1690,6 +1690,46 @@ dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,  }  int +dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                           int op_ret, int op_errno, dict_t *xattr) +{ +        int           i     = -1; +        int           ret   = -1; +        char         *value = NULL; +        dht_local_t  *local = NULL; +        dht_conf_t   *conf  = NULL; +        call_frame_t *prev  = NULL; +        int           this_call_cnt = 0; + +        local = frame->local; +        prev = cookie; +        conf = this->private; + +        if (op_ret == -1) +                goto out; + + +        ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value); +        if (ret) +                goto out; + +        if (!strcmp (value, local->key)) { +                for (i = 0; i < conf->subvolume_cnt; i++) { +                        if (conf->subvolumes[i] == prev->this) +                                conf->decommissioned_bricks[i] = prev->this; +                } +        } + +out: +        this_call_cnt = dht_frame_return (frame); +        if (is_last_call (this_call_cnt)) { +                DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP); +        } +        return 0; + +} + +int  dht_setxattr (call_frame_t *frame, xlator_t *this,                loc_t *loc, dict_t *xattr, int flags)  { @@ -1771,6 +1811,28 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,          } +        tmp = dict_get (xattr, "decommission-brick"); +        if (tmp) { +                /* This operation should happen only on '/' */ +                if (__is_root_gfid (loc->inode->gfid) != 0) { +                        op_errno = ENOTSUP; +                        goto err; +                } + +                memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095)); +                local->key = gf_strdup (value); +                local->call_cnt = conf->subvolume_cnt; + +                for (i = 0 ; i < conf->subvolume_cnt; i++) { +                        /* Get the pathinfo, and then compare */ +                        STACK_WIND (frame, dht_checking_pathinfo_cbk, +                                    conf->subvolumes[i], +                                    conf->subvolumes[i]->fops->getxattr, +                                    loc, GF_XATTR_PATHINFO_KEY); +                } +                return 0; +        } +          tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);          if (tmp) {                  gf_log (this->name, GF_LOG_INFO, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index ab1b82af2a0..3545c0f995c 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -202,6 +202,9 @@ struct dht_conf {          uint32_t       dir_spread_cnt;  	struct syncenv *env; /* The env pointer to the rebalance synctask */ + +        /* to keep track of nodes which are decomissioned */ +        xlator_t     **decommissioned_bricks;  };  typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 99abe023b2a..d8138067ee5 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -579,6 +579,12 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)                  return -1;          } +        conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *), +                                                 gf_dht_mt_xlator_t); +        if (!conf->decommissioned_bricks) { +                return -1; +        } +          return 0;  } diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 882e0209eeb..1c881be39b7 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -460,8 +460,22 @@ static inline int  dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)  {          int i = 0; +        int j = 0;          int err = 0;          int count = 0; +        dht_conf_t *conf = NULL; + +        /* Gets in use only for replace-brick, remove-brick */ +        conf = this->private; +        for (i = 0; i < layout->cnt; i++) { +                for (j = 0; j < conf->subvolume_cnt; j++) { +                        if (conf->decommissioned_bricks[j] && +                            conf->decommissioned_bricks[j] == layout->list[i].xlator) { +                                layout->list[i].err = -EINVAL; +                                break; +                        } +                } +        }          for (i = 0; i < layout->cnt; i++) {                  err = layout->list[i].err; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 87a5756546f..d9499a407a6 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -255,6 +255,47 @@ out:  int +dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf, +                                 const char *bricks) +{ +        int         i  = 0; +        int         ret  = -1; +        char       *tmpstr = NULL; +        char       *dup_brick = NULL; +        char       *node = NULL; + +        if (!conf || !bricks) +                goto out; + +        dup_brick = gf_strdup (bricks); +        node = strtok_r (dup_brick, ",", &tmpstr); +        while (node) { +                for (i = 0; i < conf->subvolume_cnt; i++) { +                        if (!strcmp (conf->subvolumes[i]->name, node)) { +                                conf->decommissioned_bricks[i] = +                                        conf->subvolumes[i]; +                                gf_log (this->name, GF_LOG_INFO, +                                        "decommissioning subvolume %s", +                                        conf->subvolumes[i]->name); +                                break; +                        } +                } +                if (i == conf->subvolume_cnt) { +                        /* Wrong node given. */ +                        goto out; +                } +                node = strtok_r (NULL, ",", &tmpstr); +        } + +        ret = 0; +out: +        if (dup_brick) +                GF_FREE (dup_brick); + +        return ret; +} + +int  reconfigure (xlator_t *this, dict_t *options)  {          dht_conf_t      *conf = NULL; @@ -299,6 +340,12 @@ reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,                            options, uint32, out); +        if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { +                ret = dht_parse_decommissioned_bricks (this, conf, temp_str); +                if (ret == -1) +                        goto out; +        } +          ret = 0;  out:          return ret; @@ -360,14 +407,14 @@ init (xlator_t *this)                  goto err;          } -        ret = dht_layouts_init (this, conf); -        if (ret == -1) { -                goto err; +        if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) { +                ret = dht_parse_decommissioned_bricks (this, conf, temp_str); +                if (ret == -1) +                        goto err;          } -        conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t), -                                    gf_dht_mt_dht_du_t); -        if (!conf->du_stats) { +        ret = dht_layouts_init (this, conf); +        if (ret == -1) {                  goto err;          } @@ -501,5 +548,8 @@ struct volume_options options[] = {          { .key  = {"directory-layout-spread"},            .type = GF_OPTION_TYPE_INT,          }, +        { .key  = {"decommissioned-bricks"}, +          .type = GF_OPTION_TYPE_ANY, +        },          { .key  = {NULL} },  }; diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 8b3a03b6f3e..8832c69ed4f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -312,7 +312,8 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)                  strcpy (vol_type, "distribute");  	/* Do not allow remove-brick if the volume is plain stripe */ -	if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->sub_count)) { +	if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && +            (volinfo->brick_count == volinfo->sub_count)) {                  snprintf (err_str, 2048, "Removing brick from a plain stripe is not allowed");                  gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);                  ret = -1; @@ -321,8 +322,8 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)  	/* Do not allow remove-brick if the bricks given is less than the replica count  	   or stripe count */ -        if (((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || (volinfo->type == GF_CLUSTER_TYPE_STRIPE)) -	    && !(volinfo->brick_count <= volinfo->sub_count)) { +        if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && +            !(volinfo->brick_count <= volinfo->sub_count)) {                  if (volinfo->sub_count && (count % volinfo->sub_count != 0)) {                          snprintf (err_str, 2048, "Remove brick incorrect"                                    " brick count of %d for %s %d", @@ -512,16 +513,20 @@ out:  int -glusterd_op_perform_remove_brick (glusterd_volinfo_t  *volinfo, char *brick) +glusterd_op_perform_remove_brick (glusterd_volinfo_t  *volinfo, char *brick, +                                  int force, int *need_migrate)  { -          glusterd_brickinfo_t    *brickinfo = NULL;          char                    *dup_brick = NULL; -        int32_t                 ret = -1; +        int32_t                  ret = -1; +        glusterd_conf_t         *priv = NULL;          GF_ASSERT (volinfo);          GF_ASSERT (brick); +        priv = THIS->private; +        GF_ASSERT (priv); +          dup_brick = gf_strdup (brick);          if (!dup_brick)                  goto out; @@ -534,15 +539,26 @@ glusterd_op_perform_remove_brick (glusterd_volinfo_t  *volinfo, char *brick)          if (ret)                  goto out; -        if (GLUSTERD_STATUS_STARTED == volinfo->status) { -                ret = glusterd_brick_stop (volinfo, brickinfo); -                if (ret) { -                        gf_log ("", GF_LOG_ERROR, "Unable to stop " -                                "glusterfs, ret: %d", ret); -                        goto out; +        if (!uuid_compare (brickinfo->uuid, priv->uuid)) { +                /* Only if the brick is in this glusterd, do the rebalance */ +                if (need_migrate) +                        *need_migrate = 1; +        } + +        if (force) { +                if (GLUSTERD_STATUS_STARTED == volinfo->status) { +                        ret = glusterd_brick_stop (volinfo, brickinfo); +                        if (ret) { +                                gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop " +                                        "glusterfs, ret: %d", ret); +                                goto out; +                        }                  } +                glusterd_delete_brick (volinfo, brickinfo); +                goto out;          } -        glusterd_delete_brick (volinfo, brickinfo); + +        brickinfo->decommissioned = 1;  out:          if (dup_brick)                  GF_FREE (dup_brick); @@ -700,17 +716,18 @@ out:  }  int -glusterd_op_stage_remove_brick (dict_t *dict) +glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)  { -        int                                     ret = -1; -        char                                    *volname = NULL; -        glusterd_volinfo_t                      *volinfo = NULL; -        dict_t                                  *ctx     = NULL; -        char                                    *errstr  = NULL; -        int32_t                                 brick_count = 0; +        int                 ret         = -1; +        char               *volname     = NULL; +        glusterd_volinfo_t *volinfo     = NULL; +        char               *errstr      = NULL; +        int32_t             brick_count = 0; +        char                msg[2048]   = {0,}; +        int32_t             flag        = 0; +        gf1_op_commands     cmd         = GF_OP_CMD_NONE;          ret = dict_get_str (dict, "volname", &volname); -          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to get volume name");                  goto out; @@ -723,25 +740,64 @@ glusterd_op_stage_remove_brick (dict_t *dict)                  goto out;          } -        if (glusterd_is_defrag_on(volinfo)) { -                ctx = glusterd_op_get_ctx (); -                errstr = gf_strdup("Rebalance is in progress. Please retry" -                                    " after completion"); -                if (!errstr) { -                        ret = -1; +        ret = dict_get_int32 (dict, "command", &flag); +        if (ret) { +                gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); +                goto out; +        } +        cmd = flag; + +        ret = -1; +        switch (cmd) { +        case GF_OP_CMD_NONE: +                errstr = gf_strdup ("no remove-brick command issued"); +                goto out; + +        case GF_OP_CMD_STATUS: +                ret = 0; +                goto out; + +        case GF_OP_CMD_START: +        { +                if (GLUSTERD_STATUS_STARTED != volinfo->status) { +                        snprintf (msg, sizeof (msg), "Volume %s needs to be started " +                                  "before remove-brick (you can use 'force' or " +                                  "'commit' to override this behavior)", +                                  volinfo->volname); +                        errstr = gf_strdup (msg); +                        gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);                          goto out;                  } -                gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); -                ret = dict_set_dynstr (ctx, "errstr", errstr); -                if (ret) { -                        GF_FREE (errstr); -                        gf_log ("", GF_LOG_DEBUG, -                                "failed to set errstr ctx"); +                if (glusterd_is_defrag_on(volinfo)) { +                        errstr = gf_strdup("Rebalance is in progress. Please retry" +                                           " after completion"); +                        gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);                          goto out;                  } +                break; +        } -                ret = -1; -                goto out; +        case GF_OP_CMD_PAUSE: +        case GF_OP_CMD_ABORT: +        { +                if (!volinfo->decommission_in_progress) { +                        errstr = gf_strdup("remove-brick is not in progress"); +                        gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); +                        goto out; +                } +                break; +        } + +        case GF_OP_CMD_COMMIT: +                if (volinfo->decommission_in_progress) { +                        errstr = gf_strdup ("use 'force' option as migration " +                                            "is in progress"); +                        goto out; +                } +                break; + +        case GF_OP_CMD_COMMIT_FORCE: +                break;          }          ret = dict_get_int32 (dict, "count", &brick_count); @@ -750,41 +806,96 @@ glusterd_op_stage_remove_brick (dict_t *dict)                  goto out;          } +        ret = 0;          if (volinfo->brick_count == brick_count) { -                ctx = glusterd_op_get_ctx (); -                if (!ctx) { -                        gf_log ("", GF_LOG_ERROR, -                                "Operation Context is not present"); -                        ret = -1; -                        goto out; -                }                  errstr = gf_strdup ("Deleting all the bricks of the "                                      "volume is not allowed"); -                if (!errstr) { -                        gf_log ("", GF_LOG_ERROR, "Out of memory"); -                        ret = -1; -                        goto out; -                } - -                ret = dict_set_dynstr (ctx, "errstr", errstr); -                if (ret) { -                        GF_FREE (errstr); -                        gf_log ("", GF_LOG_DEBUG, -                                "failed to set pump status in ctx"); -                        goto out; -                } -                  ret = -1;                  goto out;          }  out:          gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +        if (ret && errstr) { +                if (op_errstr) +                        *op_errstr = errstr; +        }          return ret;  }  int +glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo, +                                   gf_defrag_status_t status) +{ +        int                   ret = 0; +        glusterd_brickinfo_t *brickinfo = NULL; +        glusterd_brickinfo_t *tmp = NULL; + +        switch (status) { +        case GF_DEFRAG_STATUS_PAUSED: +        case GF_DEFRAG_STATUS_FAILED: +                /* No changes required in the volume file. +                   everything should remain as is */ +                break; +        case GF_DEFRAG_STATUS_STOPPED: +                /* Fall back to the old volume file */ +                list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { +                        if (!brickinfo->decommissioned) +                                continue; +                        brickinfo->decommissioned = 0; +                } +                break; + +        case GF_DEFRAG_STATUS_COMPLETE: +                /* Done with the task, you can remove the brick from the +                   volume file */ +                list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { +                        if (!brickinfo->decommissioned) +                                continue; +                        gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s", +                                brickinfo->path); +                        brickinfo->decommissioned = 0; +                        if (GLUSTERD_STATUS_STARTED == volinfo->status) { +                                ret = glusterd_brick_stop (volinfo, brickinfo); +                                if (ret) { +                                        gf_log (THIS->name, GF_LOG_ERROR, +                                                "Unable to stop glusterfs (%d)", ret); +                                } +                        } +                        glusterd_delete_brick (volinfo, brickinfo); +                } +                break; + +        default: +                GF_ASSERT (!"cbk function called with wrong status"); +                break; +        } + +        ret = glusterd_create_volfiles_and_notify_services (volinfo); +        if (ret) +                gf_log (THIS->name, GF_LOG_ERROR, +                        "Unable to write volume files (%d)", ret); + +        ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); +        if (ret) +                gf_log (THIS->name, GF_LOG_ERROR, +                        "Unable to store volume info (%d)", ret); + + +        if (GLUSTERD_STATUS_STARTED == volinfo->status) { +                ret = glusterd_check_generate_start_nfs (); +                if (ret) +                        gf_log (THIS->name, GF_LOG_ERROR, +                                "Unable to start nfs process (%d)", ret); +        } + +        volinfo->decommission_in_progress = 0; +        return 0; +} + + +int  glusterd_op_add_brick (dict_t *dict, char **op_errstr)  {          int                                     ret = 0; @@ -848,15 +959,20 @@ out:  }  int -glusterd_op_remove_brick (dict_t *dict) +glusterd_op_remove_brick (dict_t *dict, char **op_errstr)  { -        int                                     ret = -1; -        char                                    *volname = NULL; -        glusterd_volinfo_t                      *volinfo = NULL; -        char                                    *brick = NULL; -        int32_t                                 count = 0; -        int32_t                                 i = 1; -        char                                    key[256] = {0,}; +        int                 ret            = -1; +        char               *volname        = NULL; +        glusterd_volinfo_t *volinfo        = NULL; +        char               *brick          = NULL; +        int32_t             count          = 0; +        int32_t             i              = 1; +        char                key[256]       = {0,}; +        int32_t             flag           = 0; +        char                err_str[4096]  = {0,}; +        int                 need_rebalance = 0; +        int                 force          = 0; +        gf1_op_commands     cmd            = 0;          ret = dict_get_str (dict, "volname", &volname); @@ -866,12 +982,99 @@ glusterd_op_remove_brick (dict_t *dict)          }          ret = glusterd_volinfo_find (volname, &volinfo); -          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");                  goto out;          } +        ret = dict_get_int32 (dict, "command", &flag); +        if (ret) { +                gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); +                goto out; +        } +        cmd = flag; + +        ret = -1; +        switch (cmd) { +        case GF_OP_CMD_NONE: +                goto out; + +        case GF_OP_CMD_STATUS: +                ret = 0; +                goto out; + +        case GF_OP_CMD_PAUSE: +        { +                if (volinfo->decommission_in_progress) { +                        if (volinfo->defrag == (void *)1) +                                volinfo->defrag = NULL; + +                        if (volinfo->defrag) { +                                LOCK (&volinfo->defrag->lock); + +                                volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED; + +                                UNLOCK (&volinfo->defrag->lock); +                        } +                } + +                /* rebalance '_cbk()' will take care of volume file updates */ +                ret = 0; +                goto out; +        } + +        case GF_OP_CMD_ABORT: +        { +                if (volinfo->decommission_in_progress) { +                        if (volinfo->defrag == (void *)1) +                                volinfo->defrag = NULL; + +                        if (volinfo->defrag) { +                                LOCK (&volinfo->defrag->lock); + +                                volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED; + +                                UNLOCK (&volinfo->defrag->lock); +                        } +                } + +                /* rebalance '_cbk()' will take care of volume file updates */ +                ret = 0; +                goto out; +        } + +        case GF_OP_CMD_START: +                force = 0; +                break; + +        case GF_OP_CMD_COMMIT: +                force = 1; +                break; + +        case GF_OP_CMD_COMMIT_FORCE: + +                if (volinfo->decommission_in_progress) { +                        if (volinfo->defrag == (void *)1) +                                volinfo->defrag = NULL; + +                        if (volinfo->defrag) { +                                LOCK (&volinfo->defrag->lock); +                                /* Fake 'rebalance-complete' so the graph change +                                   happens right away */ +                                volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + +                                UNLOCK (&volinfo->defrag->lock); +                        } +                        ret = 0; +                        /* Graph change happens in rebalance _cbk function, +                           no need to do anything here */ +                        goto out; +                } + +                force = 1; +                break; +        } +          ret = dict_get_int32 (dict, "count", &count);          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to get count"); @@ -887,26 +1090,46 @@ glusterd_op_remove_brick (dict_t *dict)                          goto out;                  } -                ret = glusterd_op_perform_remove_brick (volinfo, brick); +                ret = glusterd_op_perform_remove_brick (volinfo, brick, force, +                                                        (i == 1) ? &need_rebalance : NULL);                  if (ret)                          goto out;                  i++;          }          ret = glusterd_create_volfiles_and_notify_services (volinfo); -        if (ret) +        if (ret) { +                gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");                  goto out; - -        volinfo->defrag_status = 0; +        }          ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - -        if (ret) +        if (ret) { +                gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");                  goto out; +        } -        if (GLUSTERD_STATUS_STARTED == volinfo->status) -                ret = glusterd_check_generate_start_nfs (); +        volinfo->defrag_status = 0; +        if (!force && need_rebalance) { +                /* perform the rebalance operations */ +                ret = glusterd_handle_defrag_start (volinfo, err_str, 4096, +                                                    GF_DEFRAG_CMD_START_FORCE, +                                                    glusterd_remove_brick_migrate_cbk); +                if (!ret) +                        volinfo->decommission_in_progress = 1; + +                if (ret) { +                        gf_log (THIS->name, GF_LOG_ERROR, +                                "failed to start the rebalance"); +                } +        } else { +                if (GLUSTERD_STATUS_STARTED == volinfo->status) +                        ret = glusterd_check_generate_start_nfs (); +        }  out: +        if (ret && err_str[0] && op_errstr) +                *op_errstr = gf_strdup (err_str); +          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 3b30fb0806e..53556984a33 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -2297,7 +2297,7 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,                          break;                  case GD_OP_REMOVE_BRICK: -                        ret = glusterd_op_stage_remove_brick (dict); +                        ret = glusterd_op_stage_remove_brick (dict, op_errstr);                          break;                  case GD_OP_LOG_FILENAME: @@ -2387,7 +2387,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,                          break;                  case GD_OP_REMOVE_BRICK: -                        ret = glusterd_op_remove_brick (dict); +                        ret = glusterd_op_remove_brick (dict, op_errstr);                          break;                  case GD_OP_LOG_FILENAME: @@ -2565,6 +2565,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)          int32_t                                 i = 1;          char                                    key[256] = {0,};          glusterd_pending_node_t                 *pending_node = NULL; +        int32_t                                 force = 0;          ret = dict_get_str (dict, "volname", &volname); @@ -2586,6 +2587,12 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)                  goto out;          } +        ret = dict_get_int32 (dict, "force", &force); +        if (ret) { +                gf_log (THIS->name, GF_LOG_INFO, "force flag is not set"); +                ret = 0; +                goto out; +        }          while ( i <= count) {                  snprintf (key, 256, "brick%d", i); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index a013d0adf6f..defaf947f4d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -46,6 +46,7 @@  #include "cli1-xdr.h"  #include "xdr-generic.h" +/* return values - 0: success, +ve: stopped, -ve: failure */  int  gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)  { @@ -66,7 +67,8 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)          if (!fd)                  goto out; -        if (defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) { +        if ((defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) || +            (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)) {                  strcpy (force_string, "force");          } else {                  strcpy (force_string, "not-force"); @@ -105,9 +107,11 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)                  }                  UNLOCK (&defrag->lock); -                if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { +                if (volinfo->defrag_status != +                    GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) { +                        /* It can be one of 'stopped|paused|commit' etc */                          closedir (fd); -                        ret = -1; +                        ret = 1;                          goto out;                  }          } @@ -144,6 +148,7 @@ out:          return ret;  } +/* return values - 0: success, +ve: stopped, -ve: failure */  int  gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)  { @@ -187,9 +192,11 @@ gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)                                  break;                  } -                if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { +                if (volinfo->defrag_status != +                    GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED) { +                        /* It can be one of 'stopped|paused|commit' etc */                          closedir (fd); -                        ret = -1; +                        ret = 1;                          goto out;                  }          } @@ -210,6 +217,7 @@ glusterd_defrag_start (void *data)          int                     ret     = -1;          struct stat             stbuf   = {0,}; +        THIS = volinfo->xl;          defrag = volinfo->defrag;          if (!defrag)                  goto out; @@ -240,8 +248,10 @@ glusterd_defrag_start (void *data)                  /* Step 1: Fix layout of all the directories */                  ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount); +                if (ret < 0) +                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; +                /* in both 'stopped' or 'failure' cases goto out */                  if (ret) { -                        volinfo->defrag_status   = GF_DEFRAG_STATUS_FAILED;                          goto out;                  } @@ -257,8 +267,10 @@ glusterd_defrag_start (void *data)                  /* Step 2: Iterate over directories to move data */                  ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount); +                if (ret < 0) +                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; +                /* in both 'stopped' or 'failure' cases goto out */                  if (ret) { -                        volinfo->defrag_status   = GF_DEFRAG_STATUS_FAILED;                          goto out;                  } @@ -267,7 +279,8 @@ glusterd_defrag_start (void *data)          }          /* Completed whole process */ -        if (defrag->cmd == GF_DEFRAG_CMD_START) +        if ((defrag->cmd == GF_DEFRAG_CMD_START) || +            (defrag->cmd == GF_DEFRAG_CMD_START_FORCE))                  volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;          volinfo->rebalance_files = defrag->total_files; @@ -281,9 +294,13 @@ out:                  ret = runcmd ("umount", "-l", defrag->mount, NULL);                  LOCK_DESTROY (&defrag->lock); + +                if (defrag->cbk_fn) { +                        defrag->cbk_fn (volinfo, volinfo->defrag_status); +                } +                  GF_FREE (defrag);          } -          return NULL;  } @@ -332,7 +349,7 @@ glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files,          LOCK (&volinfo->defrag->lock);          { -                volinfo->defrag_status = GF_DEFRAG_STATUS_STOPED; +                volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;                  *files = volinfo->defrag->total_files;                  *size = volinfo->defrag->total_data;          } @@ -497,7 +514,7 @@ out:  int  glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, -                              size_t len, int cmd) +                              size_t len, int cmd, defrag_cbk_fn_t cbk)  {          int                    ret = -1;          glusterd_defrag_info_t *defrag =  NULL; @@ -552,6 +569,9 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,          volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED; +        if (cbk) +                defrag->cbk_fn = cbk; +          ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start,                                volinfo);          if (ret) { @@ -635,7 +655,7 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)          case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:          {                  ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), -                                                    cli_req.cmd); +                                                    cli_req.cmd, NULL);                  rsp.op_ret = ret;                  break;          } @@ -845,7 +865,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)          case GF_DEFRAG_CMD_START_MIGRATE_DATA:          case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:                  ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), -                                                    cmd); +                                                    cmd, NULL);                   break;           case GF_DEFRAG_CMD_STOP:                   ret = glusterd_defrag_stop (volinfo, &files, &size, diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 060d40bed9e..4a428991061 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -1486,7 +1486,7 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t  *volinfo,          volinfo->brick_count++; -        ret = glusterd_op_perform_remove_brick (volinfo, old_brick); +        ret = glusterd_op_perform_remove_brick (volinfo, old_brick, 1, NULL);          if (ret)                  goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 53fdcf484fa..dab075db074 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -321,6 +321,10 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)          ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,                                           value); +        snprintf (value, sizeof(value), "%d", brickinfo->decommissioned); +        ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, +                                         value); +  out:          gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);          return ret; @@ -1637,6 +1641,9 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)                                  pmap = pmap_registry_get (THIS);                                  if (pmap->last_alloc <= brickinfo->rdma_port)                                          pmap->last_alloc = brickinfo->rdma_port + 1; +                        } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, +                                             strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { +                                gf_string2int (value, &brickinfo->decommissioned);                          } else {                                  gf_log ("", GF_LOG_ERROR, "Unknown key: %s",                                          key); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 3ca232a9ab5..61bda195dfc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -63,6 +63,7 @@ typedef enum glusterd_store_ver_ac_{  #define GLUSTERD_STORE_KEY_BRICK_PATH     "path"  #define GLUSTERD_STORE_KEY_BRICK_PORT     "listen-port"  #define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" +#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"  #define GLUSTERD_STORE_KEY_PEER_UUID      "uuid"  #define GLUSTERD_STORE_KEY_PEER_HOSTNAME  "hostname" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 8694f753631..5b247b6a901 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -548,6 +548,8 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo)                  goto out;          } +        new_volinfo->xl = THIS; +          *volinfo = new_volinfo;          ret = 0; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index cb198dfb9fe..d0533b1fccc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1811,10 +1811,15 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          char                   **cluster_args       = NULL;          int                      i                  = 0;          int                      j                  = 0; -        int                      ret                = 0; +        int                      ret                = -1;          xlator_t                *xl                 = NULL;          xlator_t                *txl                = NULL;          xlator_t                *trav               = NULL; +        int                      removed_bricks     = 0; +        int                      index_of_removed_brick = 0; +        char                    *removed_bricklist  = NULL; +        char                     volume_name[1024]  = {0,}; +        int                      idx                = 0;          volname = volinfo->volname;          dict    = volinfo->dict; @@ -1824,7 +1829,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                  gf_log ("", GF_LOG_ERROR,                          "volume inconsistency: brick count is 0"); -                return -1; +                goto out;          }          if (volinfo->sub_count && volinfo->sub_count < volinfo->brick_count &&              volinfo->brick_count % volinfo->sub_count != 0) { @@ -1834,7 +1839,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          "number of bricks per cluster (%d) in a multi-cluster "                          "setup",                          volinfo->brick_count, volinfo->sub_count); -                return -1; +                goto out;          }          get_transport_type (volinfo, set_dict, transt, _gf_false); @@ -1844,19 +1849,32 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          i = 0;          list_for_each_entry (brick, &volinfo->bricks, brick_list) { +                ret = -1;                  xl = volgen_graph_add_nolink (graph, "protocol/client",                                                "%s-client-%d", volname, i);                  if (!xl) -                        return -1; +                        goto out;                  ret = xlator_set_option (xl, "remote-host", brick->hostname);                  if (ret) -                        return -1; +                        goto out;                  ret = xlator_set_option (xl, "remote-subvolume", brick->path);                  if (ret) -                        return -1; +                        goto out;                  ret = xlator_set_option (xl, "transport-type", transt);                  if (ret) -                        return -1; +                        goto out; +                if (brick->decommissioned) { +                        if (!removed_bricklist) { +                                removed_bricklist = GF_CALLOC (16 * GF_UNIT_KB, +                                                               1, gf_common_mt_char); +                                index_of_removed_brick = i; +                        } +                        if (removed_bricks) +                                strcat (removed_bricklist, ","); +                        snprintf (volume_name, 1024, "%s-client-%d", volname, i); +                        strcat (removed_bricklist, volume_name); +                        removed_bricks++; +                }                  i++;          } @@ -1866,7 +1884,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          "differs from brick count (%d)", i,                          volinfo->brick_count); -                return -1; +                goto out;          }          sub_count = volinfo->sub_count; @@ -1880,15 +1898,18 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          break;                  case GF_CLUSTER_TYPE_STRIPE_REPLICATE:                          /* Replicate after the clients, then stripe */ -                        if (volinfo->replica_count == 0) -                                return -1; +                        if (volinfo->replica_count == 0) { +                                ret = -1; +                                goto out; +                        }                          sub_count = volinfo->replica_count;                          cluster_args = replicate_args;                          break;                  default:                          gf_log ("", GF_LOG_ERROR, "volume inconsistency: "                                  "unrecognized clustering type"); -                        return -1; +                        ret = -1; +                        goto out;                  }                  i = 0; @@ -1901,14 +1922,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                                                                cluster_args[0],                                                                cluster_args[1],                                                                volname, j); -                                if (!xl) -                                        return -1; +                                if (!xl) { +                                        ret = -1; +                                        goto out; +                                }                                  j++;                          }                          ret = volgen_xlator_link (xl, trav);                          if (ret) -                                return -1; +                                goto out;                          if (trav == txl)                                  break; @@ -1928,14 +1951,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                                                                        cluster_args[0],                                                                        cluster_args[1],                                                                        volname, j); -                                        if (!xl) -                                                return -1; +                                        if (!xl) { +                                                ret = -1; +                                                goto out; +                                        }                                          j++;                                  }                                  ret = volgen_xlator_link (xl, trav);                                  if (ret) -                                        return -1; +                                        goto out;                                  if (trav == txl)                                          break; @@ -1953,8 +1978,10 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          if (dist_count > 1) {                  xl = volgen_graph_add_nolink (graph, "cluster/distribute",                                                "%s-dht", volname); -                if (!xl) -                        return -1; +                if (!xl) { +                        ret = -1; +                        goto out; +                }                  trav = xl;                  for (i = 0; i < dist_count; i++) @@ -1962,28 +1989,50 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                  for (; trav != xl; trav = trav->prev) {                          ret = volgen_xlator_link (xl, trav);                          if (ret) -                                return -1; +                                goto out; +                } + +                if (removed_bricks) { +                        if (volinfo->sub_count) { +                                idx = index_of_removed_brick / volinfo->sub_count; +                                if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { +                                        snprintf (volume_name, 1024, "%s-replicate-%d", +                                                  volname, idx); +                                        strcpy (removed_bricklist, volume_name); +                                } else if (volinfo->type != GF_CLUSTER_TYPE_NONE) { +                                        snprintf (volume_name, 1024, "%s-stripe-%d  ", +                                                  volname, idx); +                                        strcpy (removed_bricklist, volume_name); +                                } +                        } +                        ret = xlator_set_option (xl, "decommissioned-bricks", +                                                 removed_bricklist); +                        if (ret) +                                goto out;                  }          }          ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);          if (ret == -1) -                return -1; +                goto out; +          if (ret) {                  xl = volgen_graph_add (graph, "features/quota", volname); -                if (!xl) -                        return -1; +                if (!xl) { +                        ret = -1; +                        goto out; +                }          }          ret = volgen_graph_set_options_generic (graph, set_dict, volname,                                                  &perfxl_option_handler);          if (ret) -                return -1; +                goto out;          xl = volgen_graph_add_as (graph, "debug/io-stats", volname);          if (!xl) -                return -1; +                goto out;          ret = volgen_graph_set_options_generic (graph, set_dict, "client",                                                  &loglevel_option_handler); @@ -1991,6 +2040,11 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          if (!ret)                  ret = volgen_graph_set_options_generic (graph, set_dict, "client",                                                          &sys_loglevel_option_handler); + +out: +        if (removed_bricklist) +                GF_FREE (removed_bricklist); +          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 86eeaeb1cae..c8fa8281903 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -45,7 +45,7 @@  #include "glusterd1-xdr.h"  #include "protocol-common.h"  #include "glusterd-pmap.h" - +#include "cli1-xdr.h"  #define GLUSTERD_MAX_VOLUME_NAME        1000  #define DEFAULT_LOG_FILE_DIRECTORY      DATADIR "/log/glusterfs" @@ -132,6 +132,7 @@ struct glusterd_brickinfo {          gf_brick_status_t status;          struct rpc_clnt *rpc;          gf_timer_t *timer; +        int decommissioned;  };  typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -142,16 +143,11 @@ struct gf_defrag_brickinfo_ {          int   size;  }; -typedef enum gf_defrag_status_ { -        GF_DEFRAG_STATUS_NOT_STARTED, -        GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED, -        GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED, -        GF_DEFRAG_STATUS_STOPED, -        GF_DEFRAG_STATUS_COMPLETE, -        GF_DEFRAG_STATUS_FAILED, -        GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE, -        GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE, -} gf_defrag_status_t; +struct glusterd_volinfo_; +typedef struct glusterd_volinfo_ glusterd_volinfo_t; + +typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo, +                                gf_defrag_status_t status);  struct glusterd_defrag_info_ {          uint64_t                     total_files; @@ -163,6 +159,8 @@ struct glusterd_defrag_info_ {          char                         mount[1024];          char                         databuf[131072];          struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */ + +        defrag_cbk_fn_t              cbk_fn;  }; @@ -219,9 +217,10 @@ struct glusterd_volinfo_ {          char                    *logdir;          dict_t                  *gsync_slaves; -}; -typedef struct glusterd_volinfo_ glusterd_volinfo_t; +        int                      decommission_in_progress; +        xlator_t                *xl; +};  typedef struct glusterd_pending_node_ {          void   *node; @@ -540,6 +539,8 @@ int glusterd_handle_cli_start_volume (rpcsvc_request_t *req);  int glusterd_handle_cli_stop_volume (rpcsvc_request_t *req);  int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req); +int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, +                                  size_t len, int cmd, defrag_cbk_fn_t cbk);  /* op-sm functions */  int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr); @@ -565,9 +566,9 @@ int glusterd_op_stop_volume (dict_t *dict);  int glusterd_op_delete_volume (dict_t *dict);  int glusterd_op_add_brick (dict_t *dict, char **op_errstr); -int glusterd_op_remove_brick (dict_t *dict); +int glusterd_op_remove_brick (dict_t *dict, char **op_errstr);  int glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr); -int glusterd_op_stage_remove_brick (dict_t *dict); +int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr);  int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr);  int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict); @@ -575,7 +576,8 @@ int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict);  /* misc */  void glusterd_do_replace_brick (void *data); -int glusterd_op_perform_remove_brick (glusterd_volinfo_t  *volinfo, char *brick); +int glusterd_op_perform_remove_brick (glusterd_volinfo_t  *volinfo, char *brick, +                                      int force, int *need_migrate);  int glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags); diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 09818ce8902..a8b7b67a46d 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -2545,7 +2545,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,                  }                  goto done;          } -        if (loc->inode && IA_ISREG (loc->inode->ia_type) && name && +        if (loc->inode && name &&              (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {                  snprintf (host_buf, 1024, "<POSIX:%s:%s>", priv->hostname,                            real_path);  | 
