From 81ab6622d403558cd6f31efeb535fe886d3beeaa Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Thu, 16 Feb 2012 21:30:47 +0530 Subject: cluster/afr: Add commands to see self-heald ops Change-Id: Id92d3276e65a6c0fe61ab328b58b3954ae116c74 BUG: 763820 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.com/2775 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/Makefile.am | 2 +- xlators/cluster/afr/src/afr-common.c | 32 +- xlators/cluster/afr/src/afr-mem-types.h | 10 +- xlators/cluster/afr/src/afr-self-heald.c | 776 +++++++++++++++++------- xlators/cluster/afr/src/afr-self-heald.h | 24 +- xlators/cluster/afr/src/afr.c | 79 ++- xlators/cluster/afr/src/afr.h | 23 +- xlators/cluster/afr/src/pump.c | 2 +- xlators/mgmt/glusterd/src/glusterd-handler.c | 1 + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 263 ++++++-- xlators/mgmt/glusterd/src/glusterd-op-sm.h | 5 + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 57 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 30 + xlators/mgmt/glusterd/src/glusterd-utils.h | 3 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 14 +- 15 files changed, 1011 insertions(+), 310 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index 16ed25af1..ed0901813 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -15,7 +15,7 @@ noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr- AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \ - -I$(top_srcdir)/xlators/lib/src + -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src CLEANFILES = diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1895150cd..9a78f6d3d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3399,7 +3399,7 @@ find_child_index (xlator_t *this, xlator_t *child) int32_t afr_notify (xlator_t *this, int32_t event, - void *data, ...) + void *data, void *data2) { afr_private_t *priv = NULL; int i = -1; @@ -3412,6 +3412,8 @@ afr_notify (xlator_t *this, int32_t event, int ret = -1; int call_psh = 0; int up_child = AFR_ALL_CHILDREN; + dict_t *input = NULL; + dict_t *output = NULL; priv = this->private; @@ -3499,10 +3501,11 @@ afr_notify (xlator_t *this, int32_t event, break; - case GF_EVENT_TRIGGER_HEAL: - gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered" - " manually. Start crawling"); - call_psh = 1; + case GF_EVENT_TRANSLATOR_OP: + input = data; + output = data2; + ret = afr_xl_op (this, input, output); + goto out; break; default: @@ -3552,7 +3555,7 @@ afr_notify (xlator_t *this, int32_t event, ret = 0; if (propagate) ret = default_notify (this, event, data); - if (call_psh) { + if (call_psh && priv->shd.enabled) { gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child); afr_do_poll_self_heal ((void*) (long) up_child); } @@ -3925,6 +3928,23 @@ afr_priv_destroy (afr_private_t *priv) goto out; inode_unref (priv->root_inode); GF_FREE (priv->shd.pos); + GF_FREE (priv->shd.pending); + GF_FREE (priv->shd.inprogress); + GF_FREE (priv->shd.sh_times); +// for (i = 0; i < priv->child_count; i++) +// if (priv->shd.timer && priv->shd.timer[i]) +// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]); + GF_FREE (priv->shd.timer); + + if (priv->shd.healed) + eh_destroy (priv->shd.healed); + + if (priv->shd.heal_failed) + eh_destroy (priv->shd.heal_failed); + + if (priv->shd.split_brain) + eh_destroy (priv->shd.split_brain); + GF_FREE (priv->last_event); if (priv->pending_key) { for (i = 0; i < priv->child_count; i++) diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 228139408..a138c9676 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -44,10 +44,12 @@ enum gf_afr_mem_types_ { gf_afr_mt_locked_fd, gf_afr_mt_inode_ctx_t, gf_afr_fd_paused_call_t, - gf_afr_mt_afr_crawl_data_t, - gf_afr_mt_afr_brick_pos_t, - gf_afr_mt_afr_shd_bool_t, - gf_afr_mt_afr_shd_timer_t, + gf_afr_mt_crawl_data_t, + gf_afr_mt_brick_pos_t, + gf_afr_mt_shd_bool_t, + gf_afr_mt_shd_timer_t, + gf_afr_mt_shd_event_t, + gf_afr_mt_time_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 1f071b871..fa7e61e49 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -25,11 +25,453 @@ #include "syncop.h" #include "afr-self-heald.h" #include "afr-self-heal-common.h" +#include "protocol-common.h" +#include "event-history.h" #define AFR_POLL_TIMEOUT 600 +typedef enum { + STOP_CRAWL_ON_SINGLE_SUBVOL = 1 +} afr_crawl_flags_t; + +typedef struct shd_dump { + dict_t *dict; + time_t sh_time; + xlator_t *this; + int child; +} shd_dump_t; + +typedef struct shd_event_ { + int child; + char *path; +} shd_event_t; + +typedef int +(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data); + +void +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, + process_entry_cbk_t process_entry, void *op_data, + gf_boolean_t exclusive, int crawl_flags, + afr_crawl_done_cbk_t crawl_done); + +static int +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data); + +void +shd_cleanup_event (void *event) +{ + shd_event_t *shd_event = event; + + if (!shd_event) + goto out; + if (shd_event->path) + GF_FREE (shd_event->path); + GF_FREE (shd_event); +out: + return; +} + +int +afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count) +{ + int i = 0; + int ret = -1; + for (i = 0; i < child_count; i++) { + if (shd->pos[i] == AFR_POS_LOCAL) { + ret = i; + break; + } + } + return ret; +} + +static int +_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) +{ + int ret = 0; + + uuid_copy (loc->pargfid, parent->inode->gfid); + loc->path = ""; + loc->name = name; + loc->parent = inode_ref (parent->inode); + if (!loc->parent) { + loc->path = NULL; + loc_wipe (loc); + ret = -1; + } + return ret; +} + +int +_add_str_to_dict (xlator_t *this, dict_t *output, int child, char *str, + gf_boolean_t dyn) +{ + //subkey not used for now + int ret = -1; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; + + ret = dict_get_int32 (output, this->name, &xl_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); + goto out; + } + + snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); + ret = dict_get_uint64 (output, key, &count); + + snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count); + if (dyn) + ret = dict_set_dynstr (output, key, str); + else + ret = dict_set_str (output, key, str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output", + str); + goto out; + } + + snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); + ret = dict_set_uint64 (output, key, count + 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not increment count"); + goto out; + } + ret = 0; +out: + return ret; +} + +int +_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child, + char **fpath) +{ + dict_t *xattr = NULL; + char *path = NULL; + int ret = -1; + + ret = syncop_getxattr (readdir_xl, child, &xattr, + GFID_TO_PATH_KEY); + if (ret) + goto out; + ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to get path for " + "gfid %s", uuid_utoa (child->gfid)); + goto out; + } + path = gf_strdup (path); + if (!path) { + ret = -1; + goto out; + } + ret = 0; +out: + if (!ret) + *fpath = path; + if (xattr) + dict_unref (xattr); + return ret; +} + +int +_add_event_to_dict (circular_buffer_t *cb, void *data) +{ + int ret = 0; + shd_dump_t *dump_data = NULL; + shd_event_t *shd_event = NULL; + + dump_data = data; + shd_event = cb->data; + if (shd_event->child != dump_data->child) + goto out; + if (cb->tv.tv_sec >= dump_data->sh_time) + ret = _add_str_to_dict (dump_data->this, dump_data->dict, + dump_data->child, shd_event->path, + _gf_false); +out: + return ret; +} + +int +_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, time_t sh_time, + int child) +{ + shd_dump_t dump_data = {0}; + + dump_data.this = this; + dump_data.dict = dict; + dump_data.sh_time = sh_time; + dump_data.child = child; + eh_dump (eh, &dump_data, _add_event_to_dict); + return 0; +} + +int +_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data, + gf_dirent_t *entry, + loc_t *childloc, loc_t *parentloc, struct iatt *iattr) +{ + dict_t *output = NULL; + xlator_t *readdir_xl = NULL; + int ret = -1; + char *path = NULL; + + if (uuid_is_null (childloc->gfid)) + goto out; + + output = crawl_data->op_data; + readdir_xl = crawl_data->readdir_xl; + + ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path); + if (ret) + goto out; + + ret = _add_str_to_dict (this, output, crawl_data->child, path, + _gf_true); +out: + if (ret && path) + GF_FREE (path); + return ret; +} + +void +_remove_stale_index (xlator_t *this, xlator_t *readdir_xl, + loc_t *parent, char *fname) +{ + int ret = 0; + loc_t index_loc = {0}; + + ret = _build_index_loc (this, &index_loc, fname, parent); + if (ret) + goto out; + gf_log (this->name, GF_LOG_INFO, "Removing stale index " + "for %s on %s", index_loc.name, readdir_xl->name); + ret = syncop_unlink (readdir_xl, &index_loc); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" + " index on %s - %s", index_loc.name, + readdir_xl->name, strerror (errno)); + } + index_loc.path = NULL; + loc_wipe (&index_loc); +out: + return; +} + +void +_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, + int32_t op_ret, int32_t op_errno, + afr_crawl_data_t *crawl_data) +{ + int ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + eh_t *eh = NULL; + char *path = NULL; + shd_event_t *event = NULL; + + priv = this->private; + shd = &priv->shd; + if (crawl_data->crawl == INDEX) { + if ((op_ret < 0) && (op_errno == ENOENT)) { + _remove_stale_index (this, crawl_data->readdir_xl, + parent, uuid_utoa (child->gfid)); + goto out; + } + ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl, + child, &path); + if (ret) + goto out; + } else { + path = gf_strdup (child->path); + if (!path) { + ret = -1; + goto out; + } + } + + if (op_ret < 0 && op_errno == EIO) + eh = shd->split_brain; + else if (op_ret < 0) + eh = shd->heal_failed; + else + eh = shd->healed; + ret = -1; + event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t); + if (!event) + goto out; + event->child = crawl_data->child; + event->path = path; + ret = eh_save_history (eh, event); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to " + "eh, (%d, %s)", path, op_ret, strerror (op_errno)); + goto out; + } + ret = 0; +out: + if (ret && path) + GF_FREE (path); + return; +} + +int +_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry, + loc_t *child, loc_t *parent, struct iatt *iattr) +{ + struct iatt parentbuf = {0}; + int ret = 0; + + if (uuid_is_null (child->gfid)) + gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); + else + gf_log (this->name, GF_LOG_DEBUG, "lookup %s", + uuid_utoa (child->gfid)); + + ret = syncop_lookup (this, child, NULL, + iattr, NULL, &parentbuf); + _crawl_post_sh_action (this, parent, child, ret, errno, crawl_data); + return ret; +} + +static int +afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) +{ + GF_FREE (data); + STACK_DESTROY (sync_frame->root); + return 0; +} + void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl); +_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + + priv = this->private; + shd = &priv->shd; + + time (&shd->sh_times[child]); + afr_start_crawl (this, child, crawl, _self_heal_entry, + NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL, + afr_crawl_done); +} + +void +_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) + _do_self_heal_on_subvol (this, i, INDEX); +} + +void +_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl) +{ + int local_child = -1; + afr_private_t *priv = NULL; + + priv = this->private; + local_child = afr_get_local_child (&priv->shd, + priv->child_count); + if (local_child < -1) { + gf_log (this->name, GF_LOG_INFO, + "No local bricks found"); + } + _do_self_heal_on_subvol (this, local_child, FULL); +} + +int +_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) + afr_start_crawl (this, i, INDEX, _add_summary_to_dict, + output, _gf_false, 0, NULL); + return 0; +} + +int +_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int i = 0; + + priv = this->private; + shd = &priv->shd; + + for (i = 0; i < priv->child_count; i++) { + if (shd->pos[i] != AFR_POS_LOCAL) + continue; + _add_eh_to_dict (this, eh, dict, shd->sh_times[i], i); + } + return 0; +} + +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) +{ + gf_xl_afr_op_t op = GF_AFR_OP_INVALID; + int ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int xl_id = 0; + + priv = this->private; + shd = &priv->shd; + + ret = dict_get_int32 (input, "xl-op", (int32_t*)&op); + if (ret) + goto out; + ret = dict_get_int32 (input, this->name, &xl_id); + if (ret) + goto out; + ret = dict_set_int32 (output, this->name, xl_id); + if (ret) + goto out; + switch (op) { + case GF_AFR_OP_HEAL_INDEX: + _do_self_heal_on_local_subvols (this, INDEX); + ret = 0; + break; + case GF_AFR_OP_HEAL_FULL: + _do_self_heal_on_local_subvol (this, FULL); + ret = 0; + break; + case GF_AFR_OP_INDEX_SUMMARY: + ret = _get_index_summary_on_local_subvols (this, output); + if (ret) + goto out; + break; + case GF_AFR_OP_HEALED_FILES: + ret = _add_all_subvols_eh_to_dict (this, shd->healed, output); + break; + case GF_AFR_OP_HEAL_FAILED_FILES: + ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed, + output); + break; + case GF_AFR_OP_SPLIT_BRAIN_FILES: + ret = _add_all_subvols_eh_to_dict (this, shd->split_brain, + output); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); + break; + } +out: + dict_del (output, this->name); + return ret; +} void afr_do_poll_self_heal (void *data) @@ -39,21 +481,14 @@ afr_do_poll_self_heal (void *data) struct timeval timeout = {0}; xlator_t *this = NULL; long child = (long)data; - int i = 0; this = THIS; priv = this->private; shd = &priv->shd; - if (child == AFR_ALL_CHILDREN) { //done by command - for (i = 0; i < priv->child_count; i++) - afr_start_crawl (this, i, INDEX); + _do_self_heal_on_subvol (this, child, INDEX); + if (shd->pos[child] == AFR_POS_REMOTE) goto out; - } else { - afr_start_crawl (this, child, INDEX); - if (shd->pos[child] == AFR_POS_REMOTE) - goto out; - } timeout.tv_sec = AFR_POLL_TIMEOUT; timeout.tv_usec = 0; if (shd->timer[child]) @@ -70,9 +505,6 @@ out: return; } -static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, - xlator_t *readdir_xl); static int get_pathinfo_host (char *pathinfo, char *hostname, size_t size) { @@ -132,15 +564,16 @@ out: int afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, - loc_t *dirloc, xlator_t *readdir_xl) + loc_t *dirloc) { afr_private_t *priv = NULL; dict_t *xattr = NULL; void *index_gfid = NULL; loc_t rootloc = {0}; - struct iatt iatt = {0}; + struct iatt iattr = {0}; struct iatt parent = {0}; int ret = 0; + xlator_t *readdir_xl = crawl_data->readdir_xl; priv = this->private; if (crawl_data->crawl == FULL) { @@ -167,13 +600,13 @@ afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, dirloc->path = ""; dirloc->inode = inode_new (priv->root_inode->table); ret = syncop_lookup (readdir_xl, dirloc, NULL, - &iatt, NULL, &parent); + &iattr, NULL, &parent); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "lookup failed on " "index dir on %s", readdir_xl->name); goto out; } - inode_link (dirloc->inode, NULL, NULL, &iatt); + inode_link (dirloc->inode, NULL, NULL, &iattr); } ret = 0; out: @@ -185,7 +618,7 @@ out: int afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, - loc_t *dirloc, xlator_t *readdir_xl) + loc_t *dirloc) { fd_t *fd = NULL; int ret = 0; @@ -199,7 +632,7 @@ afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, goto out; } - ret = syncop_opendir (readdir_xl, dirloc, fd); + ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "opendir failed on %s", dirloc->path); @@ -247,7 +680,7 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, } gf_boolean_t -_crawl_proceed (xlator_t *this, int child) +_crawl_proceed (xlator_t *this, int child, int crawl_flags) { afr_private_t *priv = this->private; gf_boolean_t proceed = _gf_false; @@ -258,77 +691,33 @@ _crawl_proceed (xlator_t *this, int child) goto out; } - if (afr_up_children_count (priv->child_up, - priv->child_count) < 2) { - gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " - "< 2 children are up"); - goto out; - } - proceed = _gf_true; -out: - return proceed; -} - -static int -_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) -{ - int ret = 0; - - uuid_copy (loc->pargfid, parent->inode->gfid); - loc->path = ""; - loc->name = name; - loc->parent = inode_ref (parent->inode); - if (!loc->parent) { - loc->path = NULL; - loc_wipe (loc); - ret = -1; - } - return ret; -} - -void -_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc, - gf_dirent_t *entry, int op_ret, int op_errno, - xlator_t *readdir_xl) -{ - loc_t index_loc = {0}; - int ret = 0; - - if (op_ret && (op_errno == ENOENT)) { - ret = _build_index_loc (this, &index_loc, entry->d_name, - parentloc); - if (ret) + if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) { + if (afr_up_children_count (priv->child_up, + priv->child_count) < 2) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " + "< 2 children are up"); goto out; - gf_log (this->name, GF_LOG_INFO, "Removing stale index " - "for %s on %s", index_loc.name, readdir_xl->name); - ret = syncop_unlink (readdir_xl, &index_loc); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" - " index on %s - %s", index_loc.name, - readdir_xl->name, strerror (errno)); } - index_loc.path = NULL; - loc_wipe (&index_loc); } + proceed = _gf_true; out: - return; + return proceed; } static int -_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, - off_t *offset, afr_crawl_data_t *crawl_data, - xlator_t *readdir_xl) +_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, + off_t *offset, afr_crawl_data_t *crawl_data) { gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; - struct iatt iatt = {0}; - struct iatt parent = {0}; int ret = 0; loc_t entry_loc = {0}; fd_t *fd = NULL; + struct iatt iattr = {0}; list_for_each_entry_safe (entry, tmp, &entries->list, list) { - if (!_crawl_proceed (this, crawl_data->child)) { + if (!_crawl_proceed (this, crawl_data->child, + crawl_data->crawl_flags)) { ret = -1; goto out; } @@ -344,62 +733,51 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, continue; } + if (crawl_data->crawl == INDEX) + entry_loc.path = NULL;//HACK loc_wipe (&entry_loc); ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc, entry, crawl_data); if (ret) goto out; - if (uuid_is_null (entry_loc.gfid)) { - gf_log (this->name, GF_LOG_WARNING, "failed to build " - "location for %s", entry->d_name); - continue; - } - if (entry_loc.path) - gf_log (this->name, GF_LOG_DEBUG, "lookup %s", - entry_loc.path); - else - gf_log (this->name, GF_LOG_DEBUG, "lookup %s", - uuid_utoa (entry_loc.gfid)); - - ret = syncop_lookup (this, &entry_loc, NULL, - &iatt, NULL, &parent); - if (crawl_data->crawl == INDEX) { - _index_crawl_post_lookup_fop (this, parentloc, entry, - ret, errno, readdir_xl); - entry_loc.path = NULL; - loc_wipe (&entry_loc); + ret = crawl_data->process_entry (this, crawl_data, entry, + &entry_loc, parentloc, &iattr); + + if (crawl_data->crawl == INDEX) continue; - } - //Don't fail the crawl if lookup fails as it - //could be because of split-brain - if (ret || (!IA_ISDIR (iatt.ia_type))) + if (ret || !IA_ISDIR (iattr.ia_type)) continue; - inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt); - ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc, - readdir_xl); + + inode_link (entry_loc.inode, parentloc->inode, NULL, &iattr); + + fd = NULL; + ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc); if (ret) continue; - ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl); - fd_unref (fd); + ret = _crawl_directory (fd, &entry_loc, crawl_data); + if (fd) + fd_unref (fd); } ret = 0; out: + if (crawl_data->crawl == INDEX) + entry_loc.path = NULL; if (entry_loc.path) loc_wipe (&entry_loc); return ret; } static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, - xlator_t *readdir_xl) +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data) { xlator_t *this = NULL; off_t offset = 0; gf_dirent_t entries; int ret = 0; gf_boolean_t free_entries = _gf_false; + xlator_t *readdir_xl = crawl_data->readdir_xl; INIT_LIST_HEAD (&entries.list); this = THIS; @@ -424,15 +802,16 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, ret = 0; free_entries = _gf_true; - if (!_crawl_proceed (this, crawl_data->child)) { + if (!_crawl_proceed (this, crawl_data->child, + crawl_data->crawl_flags)) { ret = -1; goto out; } if (list_empty (&entries.list)) goto out; - ret = _perform_self_heal (this, loc, &entries, &offset, - crawl_data, readdir_xl); + ret = _process_entries (this, loc, &entries, &offset, + crawl_data); gf_dirent_free (&entries); free_entries = _gf_false; } @@ -515,14 +894,6 @@ out: return ret; } -static int -afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) -{ - GF_FREE (data); - STACK_DESTROY (sync_frame->root); - return 0; -} - static inline int afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) { @@ -530,17 +901,74 @@ afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) } static int -afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) +afr_dir_crawl (void *data) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = -1; + xlator_t *readdir_xl = NULL; + fd_t *fd = NULL; + loc_t dirloc = {0}; + afr_crawl_data_t *crawl_data = data; + + this = THIS; + priv = this->private; + shd = &priv->shd; + + if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags)) + goto out; + + ret = afr_find_child_position (this, crawl_data->child); + if (ret) + goto out; + + if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) + goto out; + + readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); + if (!readdir_xl) + goto out; + crawl_data->readdir_xl = readdir_xl; + + ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc); + if (ret) + goto out; + + ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc); + if (ret) + goto out; + + ret = _crawl_directory (fd, &dirloc, crawl_data); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", + readdir_xl->name); + else + gf_log (this->name, GF_LOG_INFO, "Crawl completed " + "on %s", readdir_xl->name); + if (crawl_data->crawl == INDEX) + dirloc.path = NULL; +out: + if (fd) + fd_unref (fd); + if (crawl_data->crawl == INDEX) + dirloc.path = NULL; + loc_wipe (&dirloc); + return ret; +} + +static int +afr_dir_exclusive_crawl (void *data) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; - loc_t dirloc = {0}; gf_boolean_t crawl = _gf_false; int ret = 0; - xlator_t *readdir_xl = NULL; - fd_t *fd = NULL; int child = -1; + xlator_t *this = NULL; + afr_crawl_data_t *crawl_data = data; + this = THIS; priv = this->private; shd = &priv->shd; child = crawl_data->child; @@ -548,7 +976,8 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) LOCK (&priv->lock); { if (shd->inprogress[child]) { - shd->pending[child] = _gf_true; + if (shd->pending[child] != FULL) + shd->pending[child] = crawl_data->crawl; } else { shd->inprogress[child] = _gf_true; crawl = _gf_true; @@ -556,11 +985,6 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) } UNLOCK (&priv->lock); - if (!priv->root_inode) { - ret = -1; - goto out; - } - if (!crawl) { gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress " "for %s", priv->children[child]->name); @@ -568,87 +992,35 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) } do { - readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); - if (!readdir_xl) - goto done; - ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc, - readdir_xl); - if (ret) - goto done; - ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc, - readdir_xl); - if (ret) - goto done; - ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", - readdir_xl->name); - else - gf_log (this->name, GF_LOG_INFO, "Crawl completed " - "on %s", readdir_xl->name); - fd_unref (fd); - fd = NULL; -done: + afr_dir_crawl (data); LOCK (&priv->lock); { - if (shd->pending[child]) { - shd->pending[child] = _gf_false; + if (shd->pending[child] != NONE) { + crawl_data->crawl = shd->pending[child]; + shd->pending[child] = NONE; } else { shd->inprogress[child] = _gf_false; crawl = _gf_false; } } UNLOCK (&priv->lock); - if (crawl_data->crawl == INDEX) { - dirloc.path = NULL; - loc_wipe (&dirloc); - } } while (crawl); -out: - if (fd) - fd_unref (fd); - if (crawl_data->crawl == INDEX) { - dirloc.path = NULL; - loc_wipe (&dirloc); - } - return ret; -} - -static int -afr_crawl (void *data) -{ - xlator_t *this = NULL; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int ret = -1; - afr_crawl_data_t *crawl_data = data; - - this = THIS; - priv = this->private; - shd = &priv->shd; - - if (!_crawl_proceed (this, crawl_data->child)) - goto out; - ret = afr_find_child_position (this, crawl_data->child); - if (ret) - goto out; - - if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) - goto out; - - ret = afr_crawl_directory (this, crawl_data); out: return ret; } void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl) +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, + process_entry_cbk_t process_entry, void *op_data, + gf_boolean_t exclusive, int crawl_flags, + afr_crawl_done_cbk_t crawl_done) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; call_frame_t *frame = NULL; afr_crawl_data_t *crawl_data = NULL; int ret = 0; + int (*crawler) (void*) = NULL; priv = this->private; shd = &priv->shd; @@ -662,16 +1034,24 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl) afr_set_lk_owner (frame, this); afr_set_low_priority (frame); crawl_data = GF_CALLOC (1, sizeof (*crawl_data), - gf_afr_mt_afr_crawl_data_t); + gf_afr_mt_crawl_data_t); if (!crawl_data) goto out; + crawl_data->process_entry = process_entry; crawl_data->child = idx; crawl_data->pid = frame->root->pid; crawl_data->crawl = crawl; - gf_log (this->name, GF_LOG_INFO, "starting crawl for %s", - priv->children[idx]->name); - ret = synctask_new (this->ctx->env, afr_crawl, - afr_crawl_done, frame, crawl_data); + crawl_data->op_data = op_data; + crawl_data->crawl_flags = crawl_flags; + gf_log (this->name, GF_LOG_INFO, "starting crawl %d for %s", + crawl_data->crawl, priv->children[idx]->name); + + if (exclusive) + crawler = afr_dir_exclusive_crawl; + else + crawler = afr_dir_crawl; + ret = synctask_new (this->ctx->env, crawler, + crawl_done, frame, crawl_data); if (ret) gf_log (this->name, GF_LOG_ERROR, "Could not create the " "task for %d ret %d", idx, ret); @@ -679,16 +1059,6 @@ out: return; } -//void -//afr_full_self_heal (xlator_t *this) -//{ -// int i = 0; -// afr_private_t *priv = this->private; -// -// for (i = 0; i < priv->child_count; i++) -// afr_start_crawl (this, i, FULL); -//} - void afr_build_root_loc (xlator_t *this, loc_t *loc) { diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index eb1021995..44fd9f385 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -26,17 +26,22 @@ #define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) #define AFR_ALL_CHILDREN -1 -typedef enum { - INDEX, - FULL, -} afr_crawl_type_t; typedef struct afr_crawl_data_ { - int child; - pid_t pid; - afr_crawl_type_t crawl; - xlator_t *readdir_xl; + int child; + pid_t pid; + afr_crawl_type_t crawl; + xlator_t *readdir_xl; + void *op_data; + int crawl_flags; + int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data, + gf_dirent_t *entry, loc_t *child, loc_t *parent, + struct iatt *iattr); } afr_crawl_data_t; +typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data, + gf_dirent_t *entry, loc_t *child, loc_t *parent, + struct iatt *iattr); + void afr_proactive_self_heal (xlator_t *this, int idx); void afr_build_root_loc (xlator_t *this, loc_t *loc); @@ -48,4 +53,7 @@ afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent); void afr_do_poll_self_heal (void *data); + +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output); #endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index abc6aa3e5..8e2ef1008 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -30,7 +30,10 @@ #endif #include "afr-common.c" -#define SHD_INODE_LRU_LIMIT 100 +#define SHD_INODE_LRU_LIMIT 100 +#define AFR_EH_HEALED_LIMIT 1024 +#define AFR_EH_HEAL_FAIL_LIMIT 1024 +#define AFR_EH_SPLIT_BRAIN_LIMIT 1024 struct volume_options options[]; @@ -39,8 +42,13 @@ notify (xlator_t *this, int32_t event, void *data, ...) { int ret = -1; + va_list ap; + void *data2 = NULL; - ret = afr_notify (this, event, data); + va_start (ap, data); + data2 = va_arg (ap, dict_t*); + va_end (ap); + ret = afr_notify (this, event, data, data2); return ret; } @@ -342,42 +350,55 @@ init (xlator_t *this) goto out; } - priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, - gf_afr_mt_afr_brick_pos_t); - if (!priv->shd.pos) { - ret = -ENOMEM; + priv->first_lookup = 1; + priv->root_inode = NULL; + + if (!priv->shd.enabled) { + ret = 0; goto out; } + ret = -ENOMEM; + priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, + gf_afr_mt_brick_pos_t); + if (!priv->shd.pos) + goto out; + priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count, - gf_afr_mt_afr_shd_bool_t); - if (!priv->shd.pending) { - ret = -ENOMEM; + gf_afr_mt_int32_t); + if (!priv->shd.pending) goto out; - } priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress), - child_count, - gf_afr_mt_afr_shd_bool_t); - if (!priv->shd.inprogress) { - ret = -ENOMEM; + child_count, gf_afr_mt_shd_bool_t); + if (!priv->shd.inprogress) goto out; - } priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count, - gf_afr_mt_afr_shd_timer_t); - if (!priv->shd.timer) { - ret = -ENOMEM; + gf_afr_mt_shd_timer_t); + if (!priv->shd.timer) + goto out; + + priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false); + if (!priv->shd.healed) + goto out; + + priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false); + if (!priv->shd.heal_failed) + goto out; + + priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false); + if (!priv->shd.split_brain) + goto out; + + priv->shd.sh_times = GF_CALLOC (priv->child_count, + sizeof (*priv->shd.sh_times), + gf_afr_mt_time_t); + if (!priv->shd.sh_times) + goto out; + + this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); + if (!this->itable) goto out; - } - if (priv->shd.enabled) { - this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); - if (!this->itable) { - ret = -ENOMEM; - goto out; - } - } - priv->first_lookup = 1; - priv->root_inode = NULL; ret = 0; out: @@ -393,6 +414,8 @@ fini (xlator_t *this) priv = this->private; this->private = NULL; afr_priv_destroy (priv); + if (this->itable);//I dont see any destroy func + return 0; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f3d372de5..0f4a6d90a 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -88,12 +88,22 @@ typedef struct afr_inode_ctx_ { int32_t *fresh_children;//increasing order of latency } afr_inode_ctx_t; +typedef enum { + NONE, + INDEX, + FULL, +} afr_crawl_type_t; + typedef struct afr_self_heald_ { - gf_boolean_t enabled; - gf_boolean_t *pending; - gf_boolean_t *inprogress; - afr_child_pos_t *pos; - gf_timer_t **timer; + gf_boolean_t enabled; + afr_crawl_type_t *pending; + gf_boolean_t *inprogress; + afr_child_pos_t *pos; + time_t *sh_times; + gf_timer_t **timer; + eh_t *healed; + eh_t *heal_failed; + eh_t *split_brain; } afr_self_heald_t; typedef struct _afr_private { @@ -747,8 +757,7 @@ int pump_command_reply (call_frame_t *frame, xlator_t *this); int32_t -afr_notify (xlator_t *this, int32_t event, - void *data, ...); +afr_notify (xlator_t *this, int32_t event, void *data, void *data2); int afr_attempt_lock_recovery (xlator_t *this, int32_t child_index); diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 281bfd722..eae7899e9 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -2334,7 +2334,7 @@ notify (xlator_t *this, int32_t event, child_xl = (xlator_t *) data; - ret = afr_notify (this, event, data); + ret = afr_notify (this, event, data, NULL); switch (event) { case GF_EVENT_CHILD_DOWN: diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index b06dd28cf..79439535f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -651,6 +651,7 @@ out: glusterd_op_sm (); return ret; } + int glusterd_handle_cli_probe (rpcsvc_request_t *req) { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 77ed83f8b..da1299de0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -152,6 +152,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin gd1_mgmt_brick_op_req *brick_req = NULL; char *volname = NULL; char name[1024] = {0,}; + gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; GF_ASSERT (op < GD_OP_MAX); GF_ASSERT (op > GD_OP_NONE); @@ -190,8 +191,12 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin if (!brick_req) goto out; - brick_req->op = GLUSTERD_BRICK_XLATOR_HEAL; + brick_req->op = GLUSTERD_BRICK_XLATOR_OP; brick_req->name = ""; + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret) + goto out; + ret = dict_set_int32 (dict, "xl-op", heal_op); } break; case GD_OP_STATUS_VOLUME: @@ -2190,6 +2195,7 @@ glusterd_need_brick_op (glusterd_op_t op) case GD_OP_PROFILE_VOLUME: case GD_OP_STATUS_VOLUME: case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_HEAL_VOLUME: ret = _gf_true; break; default: @@ -2578,6 +2584,94 @@ _status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, return; } +//input-key: :-* +//output-key: -* +void +_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int brick_count = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + + rsp_ctx = data; + rxl_end = strchr (key, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key) - strlen (rxl_end); + strncpy (int_str, key, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + + ret = dict_get_int32 (rsp_ctx->dict, "count", &brick_count); + if (brick_id >= brick_count) + ret = dict_set_int32 (rsp_ctx->dict, "count", brick_id + 1); +out: + return; +} + +int +glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx, char **op_errstr) +{ + int ret = 0; + glusterd_heal_rsp_conv_t rsp_ctx = {0}; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + + ret = dict_get_str (req_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + rsp_ctx.dict = op_ctx; + rsp_ctx.volinfo = volinfo; + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + +out: + return ret; +} + int glusterd_status_volume_brick_rsp (glusterd_brickinfo_t *brickinfo, dict_t *rsp_dict, dict_t *op_ctx, @@ -2607,27 +2701,29 @@ glusterd_status_volume_brick_rsp (glusterd_brickinfo_t *brickinfo, rsp_ctx.count = index; rsp_ctx.dict = op_ctx; dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx); - ret = dict_set_int32 (op_ctx, "count", count); out: return ret; } int32_t -glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo, - glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, - char **op_errstr) +glusterd_handle_node_rsp (glusterd_req_ctx_t *req_ctx, void *pending_entry, + glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr) { - int ret = 0; + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; GF_ASSERT (op_errstr); switch (op) { case GD_OP_PROFILE_VOLUME: + brickinfo = pending_entry; ret = glusterd_profile_volume_brick_rsp (brickinfo, rsp_dict, op_ctx, op_errstr); break; case GD_OP_STATUS_VOLUME: + brickinfo = pending_entry; ret = glusterd_status_volume_brick_rsp (brickinfo, rsp_dict, op_ctx, op_errstr); break; @@ -2636,6 +2732,10 @@ glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo, dict_copy (rsp_dict, op_ctx); break; + case GD_OP_HEAL_VOLUME: + ret = glusterd_heal_volume_brick_rsp (req_ctx->dict, rsp_dict, + op_ctx, op_errstr); + break; default: break; } @@ -2892,16 +2992,91 @@ _add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count) char key[128] = {0,}; char *xname = NULL; - snprintf (key, sizeof (key), "heal-%d", count); + snprintf (key, sizeof (key), "xl-%d", count); ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index); if (ret == -1) goto out; ret = dict_set_dynstr (dict, key, xname); + if (ret) + goto out; + + ret = dict_set_int32 (dict, xname, index); out: return ret; } +int +_select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + int index = 1; + int rxlator_count = 0; + int replica_count = 0; + gf_boolean_t add = _gf_false; + + priv = this->private; + replica_count = volinfo->replica_count; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + + if (!uuid_compare (priv->uuid, brickinfo->uuid)) + add = _gf_true; + if (index % replica_count == 0) { + if (add) { + _add_rxlator_to_dict (dict, volinfo->volname, + (index-1)/replica_count, + rxlator_count); + rxlator_count++; + } + add = _gf_false; + } + + index++; + } + return rxlator_count; +} + +int +_select_rxlators_for_full_self_heal (xlator_t *this, + glusterd_volinfo_t *volinfo, + dict_t *dict) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + int index = 1; + int rxlator_count = 0; + int replica_count = 0; + uuid_t candidate = {0}; + + priv = this->private; + replica_count = volinfo->replica_count; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + + if (uuid_compare (brickinfo->uuid, candidate) > 0) + uuid_copy (candidate, brickinfo->uuid); + + if (index % replica_count == 0) { + if (!uuid_compare (priv->uuid, candidate)) { + _add_rxlator_to_dict (dict, volinfo->volname, + (index-1)/replica_count, + rxlator_count); + rxlator_count++; + } + uuid_clear (candidate); + } + + index++; + } + return rxlator_count; +} + static int glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr) { @@ -2909,14 +3084,11 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr) char *volname = NULL; glusterd_conf_t *priv = NULL; glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; xlator_t *this = NULL; char msg[2048] = {0,}; - int replica_count = 0; - int index = 1; - int rxlator_count = 0; - uuid_t candidate = {0}; glusterd_pending_node_t *pending_node = NULL; + gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + int rxlator_count = 0; this = THIS; GF_ASSERT (this); @@ -2939,48 +3111,43 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr) goto out; } - replica_count = volinfo->replica_count; - - index = 1; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (uuid_is_null (brickinfo->uuid)) - (void)glusterd_resolve_brick (brickinfo); - - if (uuid_compare (brickinfo->uuid, candidate) > 0) - uuid_copy (candidate, brickinfo->uuid); - - if (index % replica_count == 0) { - if (!uuid_compare (priv->uuid, candidate)) { - _add_rxlator_to_dict (dict, volname, - (index-1)/replica_count, - rxlator_count); - rxlator_count++; - } - uuid_clear (candidate); - } + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret || (heal_op == GF_AFR_OP_INVALID)) { + gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid"); + goto out; + } - index++; + switch (heal_op) { + case GF_AFR_OP_HEAL_FULL: + rxlator_count = _select_rxlators_for_full_self_heal (this, + volinfo, + dict); + break; + default: + rxlator_count = _select_rxlators_with_local_bricks (this, + volinfo, + dict); + break; } + if (!rxlator_count) + goto out; ret = dict_set_int32 (dict, "count", rxlator_count); if (ret) goto out; - if (rxlator_count) { - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } else { - pending_node->node = priv->shd; - pending_node->type = GD_NODE_SHD; - list_add_tail (&pending_node->list, - &opinfo.pending_bricks); - pending_node = NULL; - } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = priv->shd; + pending_node->type = GD_NODE_SHD; + list_add_tail (&pending_node->list, + &opinfo.pending_bricks); + pending_node = NULL; } - out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); return ret; @@ -3222,8 +3389,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) opinfo.brick_pending_count--; - glusterd_handle_brick_rsp (pending_entry, op, ev_ctx->rsp_dict, - op_ctx, &op_errstr); + glusterd_handle_node_rsp (req_ctx, pending_entry, op, ev_ctx->rsp_dict, + op_ctx, &op_errstr); if (opinfo.brick_pending_count > 0) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 12aa139f5..b4df82017 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -154,6 +154,11 @@ typedef struct glusterd_pr_brick_rsp_conv_t { dict_t *dict; } glusterd_pr_brick_rsp_conv_t; +typedef struct glusterd_heal_rsp_conv_ { + dict_t *dict; + glusterd_volinfo_t *volinfo; +} glusterd_heal_rsp_conv_t; + typedef struct glusterd_status_rsp_conv_ { int count; dict_t *dict; diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 537496f08..39a9c6161 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -111,6 +111,11 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, } break; } + case GD_OP_HEAL_VOLUME: + { + glusterd_add_bricks_hname_path_to_dict (ctx); + break; + } case GD_OP_PROFILE_VOLUME: { if (ctx && dict_get_int32 (ctx, "count", &count)) { @@ -142,7 +147,6 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_ADD_BRICK: case GD_OP_LOG_ROTATE: case GD_OP_SYNC_VOLUME: - case GD_OP_HEAL_VOLUME: case GD_OP_STATEDUMP_VOLUME: case GD_OP_REPLACE_BRICK: case GD_OP_STATUS_VOLUME: @@ -1107,6 +1111,48 @@ out: return ret; } +void +_heal_volume_add_peer_rsp (dict_t *peer_dict, char *key, data_t *value, + void *data) +{ + int max_brick = 0; + int peer_max_brick = 0; + int ret = 0; + dict_t *ctx_dict = data; + + + + ret = dict_get_int32 (ctx_dict, "count", &max_brick); + ret = dict_get_int32 (peer_dict, "count", &peer_max_brick); + if (peer_max_brick > max_brick) + ret = dict_set_int32 (ctx_dict, "count", peer_max_brick); + else + ret = dict_set_int32 (ctx_dict, "count", max_brick); + dict_del (peer_dict, "count"); + dict_copy (peer_dict, ctx_dict); + return; +} + +int +glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict) +{ + int ret = 0; + dict_t *ctx_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + + GF_ASSERT (rsp_dict); + + op = glusterd_op_get_op (); + GF_ASSERT (GD_OP_HEAL_VOLUME == op); + + ctx_dict = glusterd_op_get_ctx (op); + + if (!ctx_dict) + goto out; + dict_foreach (rsp_dict, _heal_volume_add_peer_rsp, ctx_dict); +out: + return ret; +} int32_t glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov, @@ -1229,6 +1275,13 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov, case GD_OP_DEFRAG_BRICK_VOLUME: break; + case GD_OP_HEAL_VOLUME: + ret = glusterd_volume_heal_use_rsp_dict (dict); + if (ret) + goto out; + + break; + default: break; } @@ -1723,7 +1776,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this, char *op_errstr = NULL; int pending_bricks = 0; glusterd_pending_node_t *pending_node; - glusterd_req_ctx_t *req_ctx = NULL; + glusterd_req_ctx_t *req_ctx = NULL; struct rpc_clnt *rpc = NULL; if (!this) { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 4ec8ae5dc..117e5e8f3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1475,6 +1475,36 @@ _add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) return; } +int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict) +{ + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = 0; + char key[256] = {0}; + int index = 0; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + snprintf (key, sizeof (key), "%d-hostname", index); + ret = dict_set_str (dict, key, brickinfo->hostname); + snprintf (key, sizeof (key), "%d-path", index); + ret = dict_set_str (dict, key, brickinfo->path); + index++; + } +out: + return ret; +} + int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index f71ecc404..de6185753 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -410,4 +410,7 @@ glusterd_get_trusted_client_filepath (char *filepath, gf_transport_type type); int glusterd_restart_rebalance (glusterd_conf_t *conf); + +int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 9df9d4219..caafa9fd0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -470,6 +470,8 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) "failed to " "unserialize req-buffer to dictionary"); goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; } } @@ -489,8 +491,6 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) out: if (ret && dict) dict_unref (dict); - if (cli_req.dict.dict_val) - free (cli_req.dict.dict_val); //its malloced by xdr glusterd_friend_sm (); glusterd_op_sm (); @@ -999,6 +999,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) char msg[2048]; glusterd_conf_t *priv = NULL; dict_t *opt_dict = NULL; + gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; priv = THIS->private; if (!priv) { @@ -1068,6 +1069,15 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) goto out; } + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret || (heal_op == GF_AFR_OP_INVALID)) { + ret = -1; + snprintf (msg, sizeof (msg), "Invalid heal-op"); + *op_errstr = gf_strdup (msg); + gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + goto out; + } + ret = 0; out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); -- cgit