From 642343cc55e6ea2b0d463d77334c34790c30080f Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Fri, 16 Mar 2012 16:09:42 +0530 Subject: Self-heald: Handle errors gracefully and show errors to users Change-Id: I5424ebfadb5b2773ee6f7370cc2867a555aa48dd BUG: 800352 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.com/2962 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- cli/src/cli-cmd-volume.c | 4 +- cli/src/cli-rpc-ops.c | 22 +- xlators/cluster/afr/src/afr-common.c | 2 +- xlators/cluster/afr/src/afr-mem-types.h | 1 + xlators/cluster/afr/src/afr-self-heald.c | 294 ++++++++++++++++-------- xlators/cluster/afr/src/afr-self-heald.h | 4 +- xlators/cluster/afr/src/afr.c | 1 + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 15 +- xlators/mgmt/glusterd/src/glusterd-op-sm.h | 1 + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 30 +-- xlators/mgmt/glusterd/src/glusterd-utils.c | 34 ++- xlators/mgmt/glusterd/src/glusterd-utils.h | 7 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 10 + 13 files changed, 274 insertions(+), 151 deletions(-) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 9546831ab7c..4e56f9c656c 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1561,8 +1561,10 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word, int sent = 0; int parse_error = 0; dict_t *options = NULL; + xlator_t *this = NULL; - frame = create_frame (THIS, THIS->ctx->pool); + this = THIS; + frame = create_frame (this, this->ctx->pool); if (!frame) goto out; diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index a259addb160..b88b523b7c7 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -5561,6 +5561,7 @@ cmd_heal_volume_brick_out (dict_t *dict, int brick) char key[256] = {0}; char *hostname = NULL; char *path = NULL; + char *status = NULL; uint64_t i = 0; snprintf (key, sizeof (key), "%d-hostname", brick); @@ -5571,9 +5572,14 @@ cmd_heal_volume_brick_out (dict_t *dict, int brick) ret = dict_get_str (dict, key, &path); if (ret) goto out; + cli_out ("\nBrick %s:%s", hostname, path); snprintf (key, sizeof (key), "%d-count", brick); ret = dict_get_uint64 (dict, key, &num_entries); - cli_out ("\nEntries on %s:%s %"PRIu64, hostname, path, num_entries); + cli_out ("Number of entries: %"PRIu64, num_entries); + snprintf (key, sizeof (key), "%d-status", brick); + ret = dict_get_str (dict, key, &status); + if (status && strlen (status)) + cli_out ("Status: %s", status); for (i = 0; i < num_entries; i++) { snprintf (key, sizeof (key), "%d-%"PRIu64, brick, i); ret = dict_get_str (dict, key, &path); @@ -5645,21 +5651,15 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, if (rsp.op_ret && strcmp (rsp.op_errstr, "")) cli_out ("%s", rsp.op_errstr); else - cli_out ("Starting heal on volume %s has been %s", volname, + cli_out ("Heal operation on volume %s has been %s", volname, (rsp.op_ret) ? "unsuccessful": "successful"); - if (rsp.op_ret) { - ret = rsp.op_ret; - goto out; - } - + ret = rsp.op_ret; if ((heal_op == GF_AFR_OP_HEAL_FULL) || - (heal_op == GF_AFR_OP_HEAL_INDEX)) { - ret = 0; + (heal_op == GF_AFR_OP_HEAL_INDEX)) goto out; - } - dict = dict_new (); + dict = dict_new (); if (!dict) { ret = -1; goto out; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e220a61722d..b2077c384cf 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3597,7 +3597,7 @@ afr_notify (xlator_t *this, int32_t event, if (propagate) ret = default_notify (this, event, data); if (call_psh && priv->shd.iamshd) - afr_do_poll_self_heal ((void*) (long) up_child); + afr_proactive_self_heal ((void*) (long) up_child); out: return ret; diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 343260a7968..f5292b3cca7 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -49,6 +49,7 @@ enum gf_afr_mem_types_ { gf_afr_mt_shd_timer_t, gf_afr_mt_shd_event_t, gf_afr_mt_time_t, + gf_afr_mt_pos_data_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index ecd8c472699..3068d5c46d0 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -34,6 +34,11 @@ typedef enum { STOP_CRAWL_ON_SINGLE_SUBVOL = 1 } afr_crawl_flags_t; +typedef enum { + HEAL = 1, + INFO +} shd_crawl_op; + typedef struct shd_dump { dict_t *dict; time_t sh_time; @@ -46,6 +51,12 @@ typedef struct shd_event_ { char *path; } shd_event_t; +typedef struct shd_pos_ { + int child; + xlator_t *this; + afr_child_pos_t pos; +} shd_pos_t; + typedef int (*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data); @@ -58,6 +69,9 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, static int _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data); +int +afr_syncop_find_child_position (void *data); + void shd_cleanup_event (void *event) { @@ -360,44 +374,119 @@ _do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl) afr_crawl_done); } -void -_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl) +gf_boolean_t +_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason) { - int i = 0; - afr_private_t *priv = NULL; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + gf_boolean_t proceed = _gf_false; + char *msg = NULL; priv = this->private; - for (i = 0; i < priv->child_count; i++) - _do_self_heal_on_subvol (this, i, INDEX); + shd = &priv->shd; + if (!shd->enabled) { + msg = "Self-heal daemon is not enabled"; + gf_log (this->name, GF_LOG_ERROR, msg); + goto out; + } + if (!priv->child_up[child]) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s , " + "subvol went down", priv->children[child]->name); + msg = "Brick is Not connected"; + goto out; + } + + if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) { + if (afr_up_children_count (priv->child_up, + priv->child_count) < 2) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " + "< 2 children are up"); + msg = "< 2 bricks in replica are running"; + goto out; + } + } + proceed = _gf_true; +out: + if (reason) + *reason = msg; + return proceed; } -void -_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl) +int +_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, + shd_crawl_op op, dict_t *output) { - int local_child = -1; - afr_private_t *priv = NULL; + afr_private_t *priv = NULL; + char *status = NULL; + char *subkey = NULL; + char key[256] = {0}; + shd_pos_t pos_data = {0}; + int op_ret = -1; + int xl_id = -1; + int i = 0; + int ret = 0; + int crawl_flags = 0; priv = this->private; - local_child = afr_get_local_child (&priv->shd, - priv->child_count); - if (local_child < -1) { - gf_log (this->name, GF_LOG_INFO, - "No local bricks found"); + if (op == HEAL) + crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL; + + ret = dict_get_int32 (output, this->name, &xl_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Invalid input, " + "translator-id is not available"); + goto out; + } + pos_data.this = this; + subkey = "status"; + for (i = 0; i < priv->child_count; i++) { + if (_crawl_proceed (this, i, crawl_flags, &status)) { + pos_data.child = i; + ret = synctask_new (this->ctx->env, + afr_syncop_find_child_position, + NULL, NULL, &pos_data); + if (ret) { + status = "Not able to find brick location"; + } else if (pos_data.pos == AFR_POS_REMOTE) { + status = "brick is remote"; + } else { + op_ret = 0; + if (op == HEAL) { + status = "Started self-heal"; + _do_self_heal_on_subvol (this, i, + crawl); + } else { + status = ""; + afr_start_crawl (this, i, INDEX, + _add_summary_to_dict, + output, _gf_false, 0, + NULL); + } + } + snprintf (key, sizeof (key), "%d-%d-%s", xl_id, + i, subkey); + ret = dict_set_str (output, key, status); + if (!op_ret && (crawl == FULL)) + break; + } + snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, subkey); + ret = dict_set_str (output, key, status); } - _do_self_heal_on_subvol (this, local_child, FULL); +out: + return op_ret; } int -_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) +_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, + dict_t *output) { - int i = 0; - afr_private_t *priv = NULL; + return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output); +} - priv = this->private; - for (i = 0; i < priv->child_count; i++) - afr_start_crawl (this, i, INDEX, _add_summary_to_dict, - output, _gf_false, 0, NULL); - return 0; +int +_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) +{ + return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output); } int @@ -441,17 +530,13 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) goto out; switch (op) { case GF_AFR_OP_HEAL_INDEX: - _do_self_heal_on_local_subvols (this, INDEX); - ret = 0; + ret = _do_self_heal_on_local_subvols (this, INDEX, output); break; case GF_AFR_OP_HEAL_FULL: - _do_self_heal_on_local_subvol (this, FULL); - ret = 0; + ret = _do_self_heal_on_local_subvols (this, FULL, output); break; case GF_AFR_OP_INDEX_SUMMARY: ret = _get_index_summary_on_local_subvols (this, output); - if (ret) - goto out; break; case GF_AFR_OP_HEALED_FILES: ret = _add_all_subvols_eh_to_dict (this, shd->healed, output); @@ -474,34 +559,85 @@ out: } void -afr_do_poll_self_heal (void *data) +afr_poll_self_heal (void *data) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; struct timeval timeout = {0}; xlator_t *this = NULL; long child = (long)data; + gf_timer_t *old_timer = NULL; + gf_timer_t *new_timer = NULL; this = THIS; priv = this->private; shd = &priv->shd; - if (shd->enabled) - _do_self_heal_on_subvol (this, child, INDEX); - if (shd->pos[child] == AFR_POS_REMOTE) - goto out; + _do_self_heal_on_subvol (this, child, INDEX); timeout.tv_sec = AFR_POLL_TIMEOUT; timeout.tv_usec = 0; - if (shd->timer[child]) - gf_timer_call_cancel (this->ctx, shd->timer[child]); - shd->timer[child] = gf_timer_call_after (this->ctx, timeout, - afr_do_poll_self_heal, data); + //notify and previous timer should be synchronized. + LOCK (&priv->lock); + { + old_timer = shd->timer[child]; + shd->timer[child] = gf_timer_call_after (this->ctx, timeout, + afr_poll_self_heal, + data); + new_timer = shd->timer[child]; + } + UNLOCK (&priv->lock); - if (shd->timer[child] == NULL) { + if (old_timer) + gf_timer_call_cancel (this->ctx, old_timer); + if (!new_timer) { gf_log (this->name, GF_LOG_WARNING, - "Cannot create pending self-heal polling timer for %s", + "Could not create self-heal polling timer for %s", priv->children[child]->name); } + return; +} + +static int +afr_local_child_poll_self_heal (int ret, call_frame_t *sync_frame, void *data) +{ + afr_self_heald_t *shd = NULL; + shd_pos_t *pos_data = data; + afr_private_t *priv = NULL; + + if (ret) + goto out; + + priv = pos_data->this->private; + shd = &priv->shd; + shd->pos[pos_data->child] = pos_data->pos; + if (pos_data->pos == AFR_POS_LOCAL) + afr_poll_self_heal ((void*)(long)pos_data->child); +out: + GF_FREE (data); + return 0; +} + +void +afr_proactive_self_heal (void *data) +{ + xlator_t *this = NULL; + long child = (long)data; + shd_pos_t *pos_data = NULL; + int ret = 0; + + this = THIS; + + //Position of brick could have changed and it could be local now. + //Compute the position again + pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t); + if (!pos_data) + goto out; + pos_data->this = this; + pos_data->child = child; + ret = synctask_new (this->ctx->env, afr_syncop_find_child_position, + afr_local_child_poll_self_heal, NULL, pos_data); + if (ret) + goto out; out: return; } @@ -680,31 +816,6 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, return ret; } -gf_boolean_t -_crawl_proceed (xlator_t *this, int child, int crawl_flags) -{ - afr_private_t *priv = this->private; - gf_boolean_t proceed = _gf_false; - - if (!priv->child_up[child]) { - gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s " - ", subvol went down", priv->children[child]->name); - goto out; - } - - if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) { - if (afr_up_children_count (priv->child_up, - priv->child_count) < 2) { - gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " - "< 2 children are up"); - goto out; - } - } - proceed = _gf_true; -out: - return proceed; -} - static int _process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, off_t *offset, afr_crawl_data_t *crawl_data) @@ -719,7 +830,7 @@ _process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, list_for_each_entry_safe (entry, tmp, &entries->list, list) { if (!_crawl_proceed (this, crawl_data->child, - crawl_data->crawl_flags)) { + crawl_data->crawl_flags, NULL)) { ret = -1; goto out; } @@ -813,7 +924,7 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data) free_entries = _gf_true; if (!_crawl_proceed (this, crawl_data->child, - crawl_data->crawl_flags)) { + crawl_data->crawl_flags, NULL)) { ret = -1; goto out; } @@ -847,7 +958,7 @@ position_str_get (afr_child_pos_t pos) } int -afr_find_child_position (xlator_t *this, int child) +afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos) { afr_private_t *priv = NULL; dict_t *xattr_rsp = NULL; @@ -855,28 +966,16 @@ afr_find_child_position (xlator_t *this, int child) int ret = 0; gf_boolean_t local = _gf_false; char *pathinfo = NULL; - afr_child_pos_t *pos = NULL; priv = this->private; - pos = &priv->shd.pos[child]; - - if (!priv->root_inode) { - LOCK (&priv->lock); - { - if (!priv->root_inode) - priv->root_inode = inode_ref - (this->itable->root); - } - UNLOCK (&priv->lock); - } afr_build_root_loc (this, &loc); ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, GF_XATTR_PATHINFO_KEY); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child " - "%d", child); + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s", + priv->children[child]->name); goto out; } @@ -904,18 +1003,21 @@ out: return ret; } -static inline int -afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) +int +afr_syncop_find_child_position (void *data) { - return (shd->pos[child] == AFR_POS_LOCAL); + shd_pos_t *pos_data = data; + int ret = 0; + + ret = afr_find_child_position (pos_data->this, pos_data->child, + &pos_data->pos); + return ret; } static int afr_dir_crawl (void *data) { xlator_t *this = NULL; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; int ret = -1; xlator_t *readdir_xl = NULL; fd_t *fd = NULL; @@ -923,17 +1025,9 @@ afr_dir_crawl (void *data) afr_crawl_data_t *crawl_data = data; this = THIS; - priv = this->private; - shd = &priv->shd; - - if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags)) - goto out; - - ret = afr_find_child_position (this, crawl_data->child); - if (ret) - goto out; - if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) + if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags, + NULL)) goto out; readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); @@ -1026,16 +1120,12 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, afr_crawl_done_cbk_t crawl_done) { afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; call_frame_t *frame = NULL; afr_crawl_data_t *crawl_data = NULL; int ret = 0; int (*crawler) (void*) = NULL; priv = this->private; - shd = &priv->shd; - if (!shd->enabled) - goto out; frame = create_frame (this, this->ctx->pool); if (!frame) diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 57b29ce2dc4..84873862123 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -42,14 +42,12 @@ typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data gf_dirent_t *entry, loc_t *child, loc_t *parent, struct iatt *iattr); -void afr_proactive_self_heal (xlator_t *this, int idx); - void afr_build_root_loc (xlator_t *this, loc_t *loc); int afr_set_root_gfid (dict_t *dict); void -afr_do_poll_self_heal (void *data); +afr_proactive_self_heal (void *data); int afr_xl_op (xlator_t *this, dict_t *input, dict_t *output); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 5d4516190ff..89a8e87fbc3 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -415,6 +415,7 @@ init (xlator_t *this) this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); if (!this->itable) goto out; + priv->root_inode = inode_ref (this->itable->root); ret = 0; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index d96d6717eeb..ce9581644b1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -2780,9 +2780,9 @@ _heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data) int rxl_child_id = 0; int brick_id = 0; int int_len = 0; - int brick_count = 0; int ret = 0; glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; rsp_ctx = data; rxl_end = strchr (key, '-'); @@ -2810,13 +2810,19 @@ _heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data) volinfo = rsp_ctx->volinfo; brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + if (!strcmp (rxl_child_end, "status")) { + brickinfo = glusterd_get_brickinfo_by_position (volinfo, + brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, + brickinfo)) + goto out; + } new_value = data_copy (value); snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end); dict_set (rsp_ctx->dict, new_key, new_value); - ret = dict_get_int32 (rsp_ctx->dict, "count", &brick_count); - if (brick_id >= brick_count) - ret = dict_set_int32 (rsp_ctx->dict, "count", brick_id + 1); out: return; } @@ -2847,6 +2853,7 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, rsp_ctx.dict = op_ctx; rsp_ctx.volinfo = volinfo; + rsp_ctx.this = THIS; dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); out: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index bfc41b29252..1f32681a5a1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -157,6 +157,7 @@ typedef struct glusterd_pr_brick_rsp_conv_t { typedef struct glusterd_heal_rsp_conv_ { dict_t *dict; glusterd_volinfo_t *volinfo; + xlator_t *this; } glusterd_heal_rsp_conv_t; typedef struct glusterd_status_rsp_conv_ { diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index c7931dbfafc..649156f4b33 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -111,11 +111,6 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, } break; } - case GD_OP_HEAL_VOLUME: - { - glusterd_add_bricks_hname_path_to_dict (ctx); - break; - } case GD_OP_PROFILE_VOLUME: { if (ctx && dict_get_int32 (ctx, "count", &count)) { @@ -153,6 +148,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_SET_VOLUME: case GD_OP_LIST_VOLUME: case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_HEAL_VOLUME: { /*nothing specific to be done*/ break; @@ -1198,28 +1194,6 @@ out: return ret; } -void -_heal_volume_add_peer_rsp (dict_t *peer_dict, char *key, data_t *value, - void *data) -{ - int max_brick = 0; - int peer_max_brick = 0; - int ret = 0; - dict_t *ctx_dict = data; - - - - ret = dict_get_int32 (ctx_dict, "count", &max_brick); - ret = dict_get_int32 (peer_dict, "count", &peer_max_brick); - if (peer_max_brick > max_brick) - ret = dict_set_int32 (ctx_dict, "count", peer_max_brick); - else - ret = dict_set_int32 (ctx_dict, "count", max_brick); - dict_del (peer_dict, "count"); - dict_copy (peer_dict, ctx_dict); - return; -} - int glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict) { @@ -1236,7 +1210,7 @@ glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict) if (!ctx_dict) goto out; - dict_foreach (rsp_dict, _heal_volume_add_peer_rsp, ctx_dict); + dict_copy (rsp_dict, ctx_dict); out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 9cc436496ab..4b84039dfe6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -3303,9 +3303,23 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, return ret; } +glusterd_brickinfo_t* +glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos) +{ + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + list_for_each_entry (tmpbrkinfo, &volinfo->bricks, + brick_list) { + if (pos == 0) + return tmpbrkinfo; + pos--; + } + return NULL; +} + void glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, - gf_brick_status_t status) + gf_brick_status_t status) { GF_ASSERT (brickinfo); brickinfo->status = status; @@ -5139,3 +5153,21 @@ glusterd_uuid_to_hostname (uuid_t uuid) return hostname; } +gf_boolean_t +glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + gf_boolean_t local = _gf_false; + int ret = 0; + glusterd_conf_t *conf = NULL; + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) + goto out; + } + conf = this->private; + local = !uuid_compare (brickinfo->uuid, conf->uuid); +out: + return local; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 7b5a387c275..fa9f7737005 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -419,4 +419,11 @@ glusterd_add_node_to_dict (char *server, dict_t *dict, int count); char * glusterd_uuid_to_hostname (uuid_t uuid); + +glusterd_brickinfo_t* +glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos); + +gf_boolean_t +glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index d6e58c1b844..93c00983a21 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -448,6 +448,7 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) dict_t *dict = NULL; glusterd_op_t cli_op = GD_OP_HEAL_VOLUME; char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; GF_ASSERT (req); @@ -483,6 +484,15 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) gf_log ("glusterd", GF_LOG_INFO, "Received heal vol req" "for volume %s", volname); + ret = glusterd_add_bricks_hname_path_to_dict (dict); + if (ret) + goto out; + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + ret = dict_set_int32 (dict, "count", volinfo->brick_count); + if (ret) + goto out; ret = glusterd_op_begin (req, GD_OP_HEAL_VOLUME, dict); gf_cmd_log ("volume heal","on volname: %s %s", volname, -- cgit