From e06f4590616dfe7b93e8ac147ed812756df4f22d Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Thu, 22 Sep 2016 16:48:54 +0530 Subject: cluster/afr: CLI for granular entry heal enablement/disablement Backport of: http://review.gluster.org/15747 When there are already existing non-granular indices created that are yet to be healed, if granular-entry-heal option is toggled from 'off' to 'on', AFR self-heal whenever it kicks in, will try to look for granular indices in 'entry-changes'. Because of the absence of name indices, granular entry healing logic will fail to heal these directories, and worse yet unset pending extended attributes with the assumption that are no entries that need heal. To get around this, a new CLI is introduced which will invoke glfsheal program to figure whether at the time an attempt is made to enable granular entry heal, there are pending heals on the volume OR there are one or more bricks that are down. If either of them is true, the command will be failed with the appropriate error. New CLI: gluster volume heal granular-entry-heal {enable,disable} Change-Id: I342e0390f847fcb015a50ef58aedfcbcb58f4ed3 BUG: 1398501 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/15942 NetBSD-regression: NetBSD Build System Smoke: Gluster Build System CentOS-regression: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- heal/src/glfs-heal.c | 180 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 151 insertions(+), 29 deletions(-) (limited to 'heal') diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index 0a880cb752e..a306d8962fa 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -39,7 +39,7 @@ xmlDocPtr glfsh_doc = NULL; ret = 0; \ } while (0) \ -typedef void (*print_status) (dict_t *, char *, uuid_t, uint64_t *, +typedef int (*print_status) (dict_t *, char *, uuid_t, uint64_t *, gf_boolean_t flag); int glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, @@ -65,12 +65,41 @@ int32_t is_xml; "source-brick [] | "\ "split-brain-info]\n" +typedef enum { + GLFSH_MODE_CONTINUE_ON_ERROR = 1, + GLFSH_MODE_EXIT_ON_FIRST_FAILURE, +} glfsh_fail_mode_t; + int glfsh_init () { return 0; } +int +glfsh_end_op_granular_entry_heal (int op_ret, char *op_errstr) +{ + /* If error sting is available, give it higher precedence.*/ + + if (op_errstr) { + printf ("%s\n", op_errstr); + } else if (op_ret < 0) { + if (op_ret == -EAGAIN) + printf ("One or more entries need heal. Please execute " + "the command again after there are no entries " + "to be healed\n"); + else if (op_ret == -ENOTCONN) + printf ("One or more bricks could be down. Please " + "execute the command again after bringing all " + "bricks online and finishing any pending " + "heals\n"); + else + printf ("Command failed - %s. Please check the logs for" + " more details\n", strerror (-op_ret)); + } + return 0; +} + int glfsh_end (int op_ret, char *op_errstr) { @@ -86,6 +115,12 @@ glfsh_print_hr_spb_status (char *path, uuid_t gfid, char *status) return; } +void +glfsh_no_print_hr_heal_status (char *path, uuid_t gfid, char *status) +{ + return; +} + void glfsh_print_hr_heal_status (char *path, uuid_t gfid, char *status) { @@ -290,6 +325,12 @@ out: return ret; } +int +glfsh_no_print_hr_heal_op_status (int ret, uint64_t num_entries, char *fmt_str) +{ + return 0; +} + int glfsh_print_hr_heal_op_status (int ret, uint64_t num_entries, char *fmt_str) { @@ -417,7 +458,7 @@ glfsh_index_purge (xlator_t *subvol, inode_t *inode, char *name) return ret; } -void +int glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, uint64_t *num_entries, gf_boolean_t flag) { @@ -429,7 +470,7 @@ glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, ret = dict_get_str (dict, "heal-info", &value); if (ret) - return; + return 0; if (!strcmp (value, "split-brain")) { split_b = _gf_true; @@ -451,10 +492,10 @@ glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, gfid, NULL); } } - return; + return 0; } -void +int glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid, uint64_t *num_entries, gf_boolean_t ignore_dirty) { @@ -466,7 +507,7 @@ glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid, ret = dict_get_str (dict, "heal-info", &value); if (ret || (!strcmp (value, "no-heal"))) - return; + return 0; if (!strcmp (value, "heal")) { ret = gf_asprintf (&status, " "); @@ -509,7 +550,7 @@ out: if (pending) { GF_FREE (status); status = NULL; - return; + return 0; } } if (ret == -1) @@ -522,7 +563,21 @@ out: status ? status : ""); GF_FREE (status); - return; + return 0; +} + +int +glfsh_heal_status_boolean (dict_t *dict, char *path, uuid_t gfid, + uint64_t *num_entries, gf_boolean_t ignore_dirty) +{ + int ret = 0; + char *value = NULL; + + ret = dict_get_str (dict, "heal-info", &value); + if ((!ret) && (!strcmp (value, "no-heal"))) + return 0; + else + return -1; } static int @@ -556,11 +611,12 @@ static int glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, uint64_t *offset, uint64_t *num_entries, print_status glfsh_print_status, - gf_boolean_t ignore_dirty) + gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode) { gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; int ret = 0; + int print_status = 0; char *path = NULL; uuid_t gfid = {0}; xlator_t *this = NULL; @@ -586,8 +642,13 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, gf_uuid_copy (loc.gfid, gfid); ret = syncop_getxattr (this, &loc, &dict, GF_HEAL_INFO, NULL, NULL); - if (ret) - continue; + if (ret) { + if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && + (ret == -ENOTCONN)) + goto out; + else + continue; + } ret = syncop_gfid_to_path (this->itable, xl, gfid, &path); @@ -596,11 +657,19 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, ret = 0; continue; } - if (dict) - glfsh_print_status (dict, path, gfid, - num_entries, ignore_dirty); + if (dict) { + print_status = glfsh_print_status (dict, path, gfid, + num_entries, + ignore_dirty); + if ((print_status) && + (mode != GLFSH_MODE_CONTINUE_ON_ERROR)) { + ret = -EAGAIN; + goto out; + } + } } ret = 0; +out: GF_FREE (path); if (dict) { dict_unref (dict); @@ -615,17 +684,21 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, dict_t *xattr_req, uint64_t *num_entries, gf_boolean_t ignore) { - uint64_t offset = 0; + int ret = 0; + int heal_op = -1; + uint64_t offset = 0; gf_dirent_t entries; - int ret = 0; gf_boolean_t free_entries = _gf_false; - int heal_op = -1; + glfsh_fail_mode_t mode = GLFSH_MODE_CONTINUE_ON_ERROR; INIT_LIST_HEAD (&entries.list); ret = dict_get_int32 (xattr_req, "heal-op", &heal_op); if (ret) return ret; + if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + mode = GLFSH_MODE_EXIT_ON_FIRST_FAILURE; + while (1) { ret = syncop_readdir (readdir_xl, fd, 131072, offset, &entries, NULL, NULL); @@ -642,7 +715,7 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, &entries, &offset, num_entries, glfsh_print_heal_status, - ignore); + ignore, mode); if (ret < 0) goto out; } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { @@ -650,13 +723,20 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, &entries, &offset, num_entries, glfsh_print_spb_status, - ignore); + ignore, mode); if (ret < 0) goto out; } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { ret = glfsh_heal_entries (fs, top_subvol, rootloc, &entries, &offset, num_entries, xattr_req); + } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) { + ret = glfsh_process_entries (readdir_xl, fd, &entries, + &offset, num_entries, + glfsh_heal_status_boolean, + ignore, mode); + if (ret < 0) + goto out; } gf_dirent_free (&entries); free_entries = _gf_false; @@ -668,6 +748,12 @@ out: return ret; } +static int +glfsh_no_print_brick_from_xl (xlator_t *xl, loc_t *rootloc) +{ + return 0; +} + static int glfsh_print_brick_from_xl (xlator_t *xl, loc_t *rootloc) { @@ -746,6 +832,13 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (ret) goto out; + if ((!is_parent_replicate) && + ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE))) { + ret = 0; + goto out; + } + ret = glfsh_output->print_brick_from_xl (xl, rootloc); if (ret < 0) goto out; @@ -753,6 +846,10 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, ret = glfsh_print_pending_heals_type (fs, top_subvol, rootloc, xl, heal_op, xattr_req, GF_XATTROP_INDEX_GFID, &count); + + if (ret < 0 && heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + goto out; + total += count; count = 0; if (ret == -ENOTCONN) @@ -833,14 +930,14 @@ out: return NULL; } - int glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, gf_xl_afr_op_t heal_op) { - xlator_t *xl = NULL; + int ret = 0; + xlator_t *xl = NULL; xlator_t *heal_xl = NULL; - xlator_t *old_THIS = NULL; + xlator_t *old_THIS = NULL; xl = top_subvol; while (xl->next) @@ -851,20 +948,28 @@ glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (heal_xl) { old_THIS = THIS; THIS = heal_xl; - glfsh_print_pending_heals (fs, top_subvol, - rootloc, xl, - heal_op, - !strcmp - (heal_xl->type, - "cluster/replicate")); + ret = glfsh_print_pending_heals (fs, top_subvol, + rootloc, xl, + heal_op, + !strcmp + (heal_xl->type, + "cluster/replicate")); THIS = old_THIS; + + if ((ret < 0) && + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)) + goto out; } } xl = xl->prev; } - return 0; +out: + if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + ret = 0; + + return ret; } int @@ -1102,6 +1207,15 @@ glfsh_info_t glfsh_human_readable = { .end = glfsh_end }; +glfsh_info_t glfsh_no_print = { + .init = glfsh_init, + .print_brick_from_xl = glfsh_no_print_brick_from_xl, + .print_heal_op_status = glfsh_no_print_hr_heal_op_status, + .print_heal_status = glfsh_no_print_hr_heal_status, + .print_spb_status = glfsh_no_print_hr_heal_status, + .end = glfsh_end_op_granular_entry_heal +}; + #if (HAVE_LIB_XML) glfsh_info_t glfsh_xml_output = { .init = glfsh_xml_init, @@ -1145,6 +1259,8 @@ main (int argc, char **argv) } else if (!strcmp (argv[2], "xml")) { heal_op = GF_SHD_OP_INDEX_SUMMARY; is_xml = 1; + } else if (!strcmp (argv[2], "granular-entry-heal-op")) { + heal_op = GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE; } else { printf (USAGE_STR, argv[0]); ret = -1; @@ -1201,6 +1317,9 @@ main (int argc, char **argv) } + if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) + glfsh_output = &glfsh_no_print; + ret = glfsh_output->init (); if (ret) exit (EXIT_FAILURE); @@ -1277,6 +1396,7 @@ main (int argc, char **argv) switch (heal_op) { case GF_SHD_OP_INDEX_SUMMARY: case GF_SHD_OP_SPLIT_BRAIN_FILES: + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc, heal_op); break; @@ -1295,6 +1415,8 @@ main (int argc, char **argv) } glfsh_output->end (ret, NULL); + if (ret < 0) + ret = -ret; loc_wipe (&rootloc); glfs_subvol_done (fs, top_subvol); cleanup (fs); -- cgit