diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2012-02-16 21:30:47 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vijay@gluster.com> | 2012-02-20 21:23:37 -0800 | 
| commit | 81ab6622d403558cd6f31efeb535fe886d3beeaa (patch) | |
| tree | 7e30ec6d7ee51e957a50c98741f0a3a7118b5dfa /xlators/cluster/afr | |
| parent | 5f117a4a1fca3ec2d163fe77615cf6859c0450e4 (diff) | |
cluster/afr: Add commands to see self-heald ops
Change-Id: Id92d3276e65a6c0fe61ab328b58b3954ae116c74
BUG: 763820
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.com/2775
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/afr')
| -rw-r--r-- | xlators/cluster/afr/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 32 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-mem-types.h | 10 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 776 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 24 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 79 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 23 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/pump.c | 2 | 
8 files changed, 690 insertions, 258 deletions
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index 16ed25af1..ed0901813 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -15,7 +15,7 @@ noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-  AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \  	    -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \ -	    -I$(top_srcdir)/xlators/lib/src +	    -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src  CLEANFILES = diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1895150cd..9a78f6d3d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3399,7 +3399,7 @@ find_child_index (xlator_t *this, xlator_t *child)  int32_t  afr_notify (xlator_t *this, int32_t event, -            void *data, ...) +            void *data, void *data2)  {          afr_private_t   *priv               = NULL;          int             i                   = -1; @@ -3412,6 +3412,8 @@ afr_notify (xlator_t *this, int32_t event,          int             ret                 = -1;          int             call_psh            = 0;          int             up_child            = AFR_ALL_CHILDREN; +        dict_t          *input              = NULL; +        dict_t          *output             = NULL;          priv = this->private; @@ -3499,10 +3501,11 @@ afr_notify (xlator_t *this, int32_t event,                  break; -        case GF_EVENT_TRIGGER_HEAL: -                gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered" -                        " manually. Start crawling"); -                call_psh = 1; +        case GF_EVENT_TRANSLATOR_OP: +                input = data; +                output = data2; +                ret = afr_xl_op (this, input, output); +                goto out;                  break;          default: @@ -3552,7 +3555,7 @@ afr_notify (xlator_t *this, int32_t event,          ret = 0;          if (propagate)                  ret = default_notify (this, event, data); -        if (call_psh) { +        if (call_psh && priv->shd.enabled) {                  gf_log (this->name, GF_LOG_DEBUG, "start crawl: %d", up_child);                  afr_do_poll_self_heal ((void*) (long) up_child);          } @@ -3925,6 +3928,23 @@ afr_priv_destroy (afr_private_t *priv)                  goto out;          inode_unref (priv->root_inode);          GF_FREE (priv->shd.pos); +        GF_FREE (priv->shd.pending); +        GF_FREE (priv->shd.inprogress); +        GF_FREE (priv->shd.sh_times); +//        for (i = 0; i < priv->child_count; i++) +//                if (priv->shd.timer && priv->shd.timer[i]) +//                        gf_timer_call_cancel (this->ctx, priv->shd.timer[i]); +        GF_FREE (priv->shd.timer); + +        if (priv->shd.healed) +                eh_destroy (priv->shd.healed); + +        if (priv->shd.heal_failed) +                eh_destroy (priv->shd.heal_failed); + +        if (priv->shd.split_brain) +                eh_destroy (priv->shd.split_brain); +          GF_FREE (priv->last_event);          if (priv->pending_key) {                  for (i = 0; i < priv->child_count; i++) diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 228139408..a138c9676 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -44,10 +44,12 @@ enum gf_afr_mem_types_ {          gf_afr_mt_locked_fd,          gf_afr_mt_inode_ctx_t,          gf_afr_fd_paused_call_t, -        gf_afr_mt_afr_crawl_data_t, -        gf_afr_mt_afr_brick_pos_t, -        gf_afr_mt_afr_shd_bool_t, -        gf_afr_mt_afr_shd_timer_t, +        gf_afr_mt_crawl_data_t, +        gf_afr_mt_brick_pos_t, +        gf_afr_mt_shd_bool_t, +        gf_afr_mt_shd_timer_t, +        gf_afr_mt_shd_event_t, +        gf_afr_mt_time_t,          gf_afr_mt_end  };  #endif diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 1f071b871..fa7e61e49 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -25,11 +25,453 @@  #include "syncop.h"  #include "afr-self-heald.h"  #include "afr-self-heal-common.h" +#include "protocol-common.h" +#include "event-history.h"  #define AFR_POLL_TIMEOUT 600 +typedef enum { +        STOP_CRAWL_ON_SINGLE_SUBVOL = 1 +} afr_crawl_flags_t; + +typedef struct shd_dump { +        dict_t   *dict; +        time_t   sh_time; +        xlator_t *this; +        int      child; +} shd_dump_t; + +typedef struct shd_event_ { +        int     child; +        char    *path; +} shd_event_t; + +typedef int +(*afr_crawl_done_cbk_t)  (int ret, call_frame_t *sync_frame, void *crawl_data); + +void +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, +                 process_entry_cbk_t process_entry, void *op_data, +                 gf_boolean_t exclusive, int crawl_flags, +                 afr_crawl_done_cbk_t crawl_done); + +static int +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data); + +void +shd_cleanup_event (void *event) +{ +        shd_event_t *shd_event = event; + +        if (!shd_event) +                goto out; +        if (shd_event->path) +                GF_FREE (shd_event->path); +        GF_FREE (shd_event); +out: +        return; +} + +int +afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count) +{ +        int i = 0; +        int ret = -1; +        for (i = 0; i < child_count; i++) { +                if (shd->pos[i] == AFR_POS_LOCAL) { +                        ret = i; +                        break; +                } +        } +        return ret; +} + +static int +_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) +{ +        int             ret = 0; + +        uuid_copy (loc->pargfid, parent->inode->gfid); +        loc->path = ""; +        loc->name = name; +        loc->parent = inode_ref (parent->inode); +        if (!loc->parent) { +                loc->path = NULL; +                loc_wipe (loc); +                ret = -1; +        } +        return ret; +} + +int +_add_str_to_dict (xlator_t *this, dict_t *output, int child, char *str, +                  gf_boolean_t dyn) +{ +        //subkey not used for now +        int             ret = -1; +        uint64_t        count = 0; +        char            key[256] = {0}; +        int             xl_id = 0; + +        ret = dict_get_int32 (output, this->name, &xl_id); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); +                goto out; +        } + +        snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); +        ret = dict_get_uint64 (output, key, &count); + +        snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count); +        if (dyn) +                ret = dict_set_dynstr (output, key, str); +        else +                ret = dict_set_str (output, key, str); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output", +                        str); +                goto out; +        } + +        snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); +        ret = dict_set_uint64 (output, key, count + 1); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Could not increment count"); +                goto out; +        } +        ret = 0; +out: +        return ret; +} + +int +_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child, +                         char **fpath) +{ +        dict_t          *xattr = NULL; +        char            *path = NULL; +        int             ret = -1; + +        ret = syncop_getxattr (readdir_xl, child, &xattr, +                               GFID_TO_PATH_KEY); +        if (ret) +                goto out; +        ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path); +        if (ret) { +                gf_log (this->name, GF_LOG_DEBUG, "Failed to get path for " +                        "gfid %s", uuid_utoa (child->gfid)); +                goto out; +        } +        path = gf_strdup (path); +        if (!path) { +                ret = -1; +                goto out; +        } +        ret = 0; +out: +        if (!ret) +                *fpath = path; +        if (xattr) +                dict_unref (xattr); +        return ret; +} + +int +_add_event_to_dict (circular_buffer_t *cb, void *data) +{ +        int               ret = 0; +        shd_dump_t        *dump_data = NULL; +        shd_event_t       *shd_event = NULL; + +        dump_data = data; +        shd_event = cb->data; +        if (shd_event->child != dump_data->child) +                goto out; +        if (cb->tv.tv_sec >= dump_data->sh_time) +                ret = _add_str_to_dict (dump_data->this, dump_data->dict, +                                        dump_data->child, shd_event->path, +                                        _gf_false); +out: +        return ret; +} + +int +_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, time_t sh_time, +                 int child) +{ +        shd_dump_t dump_data = {0}; + +        dump_data.this = this; +        dump_data.dict = dict; +        dump_data.sh_time = sh_time; +        dump_data.child = child; +        eh_dump (eh, &dump_data, _add_event_to_dict); +        return 0; +} + +int +_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data, +                      gf_dirent_t *entry, +                      loc_t *childloc, loc_t *parentloc, struct iatt *iattr) +{ +        dict_t          *output = NULL; +        xlator_t        *readdir_xl = NULL; +        int             ret = -1; +        char            *path = NULL; + +        if (uuid_is_null (childloc->gfid)) +                goto out; + +        output = crawl_data->op_data; +        readdir_xl = crawl_data->readdir_xl; + +        ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path); +        if (ret) +                goto out; + +        ret = _add_str_to_dict (this, output, crawl_data->child, path, +                                _gf_true); +out: +        if (ret && path) +                GF_FREE (path); +        return ret; +} + +void +_remove_stale_index (xlator_t *this, xlator_t *readdir_xl, +                     loc_t *parent, char *fname) +{ +        int              ret = 0; +        loc_t            index_loc = {0}; + +        ret = _build_index_loc (this, &index_loc, fname, parent); +        if (ret) +                goto out; +        gf_log (this->name, GF_LOG_INFO, "Removing stale index " +                "for %s on %s", index_loc.name, readdir_xl->name); +        ret = syncop_unlink (readdir_xl, &index_loc); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" +                        " index on %s - %s", index_loc.name, +                        readdir_xl->name, strerror (errno)); +        } +        index_loc.path = NULL; +        loc_wipe (&index_loc); +out: +        return; +} + +void +_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, +                       int32_t op_ret, int32_t op_errno, +                       afr_crawl_data_t *crawl_data) +{ +        int              ret = 0; +        afr_private_t    *priv = NULL; +        afr_self_heald_t *shd = NULL; +        eh_t             *eh = NULL; +        char             *path = NULL; +        shd_event_t      *event = NULL; + +        priv = this->private; +        shd  = &priv->shd; +        if (crawl_data->crawl == INDEX) { +                if ((op_ret < 0) && (op_errno == ENOENT)) { +                        _remove_stale_index (this, crawl_data->readdir_xl, +                                             parent, uuid_utoa (child->gfid)); +                        goto out; +                } +                ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl, +                                               child, &path); +                if (ret) +                        goto out; +        } else { +                path = gf_strdup (child->path); +                if (!path) { +                        ret = -1; +                        goto out; +                } +        } + +        if (op_ret < 0 && op_errno == EIO) +                eh = shd->split_brain; +        else if (op_ret < 0) +                eh = shd->heal_failed; +        else +                eh = shd->healed; +        ret = -1; +        event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t); +        if (!event) +                goto out; +        event->child = crawl_data->child; +        event->path = path; +        ret = eh_save_history (eh, event); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to " +                        "eh, (%d, %s)", path, op_ret, strerror (op_errno)); +                goto out; +        } +        ret = 0; +out: +        if (ret && path) +                GF_FREE (path); +        return; +} + +int +_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry, +                  loc_t *child, loc_t *parent, struct iatt *iattr) +{ +        struct iatt      parentbuf = {0}; +        int              ret = 0; + +        if (uuid_is_null (child->gfid)) +                gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); +        else +                gf_log (this->name, GF_LOG_DEBUG, "lookup %s", +                        uuid_utoa (child->gfid)); + +        ret = syncop_lookup (this, child, NULL, +                             iattr, NULL, &parentbuf); +        _crawl_post_sh_action (this, parent, child, ret, errno, crawl_data); +        return ret; +} + +static int +afr_crawl_done  (int ret, call_frame_t *sync_frame, void *data) +{ +        GF_FREE (data); +        STACK_DESTROY (sync_frame->root); +        return 0; +} +  void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl); +_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl) +{ +        afr_private_t   *priv = NULL; +        afr_self_heald_t *shd = NULL; + +        priv = this->private; +        shd = &priv->shd; + +        time (&shd->sh_times[child]); +        afr_start_crawl (this, child, crawl, _self_heal_entry, +                         NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL, +                         afr_crawl_done); +} + +void +_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl) +{ +        int             i = 0; +        afr_private_t   *priv = NULL; + +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) +                _do_self_heal_on_subvol (this, i, INDEX); +} + +void +_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl) +{ +        int             local_child = -1; +        afr_private_t   *priv = NULL; + +        priv = this->private; +        local_child = afr_get_local_child (&priv->shd, +                                           priv->child_count); +        if (local_child < -1) { +               gf_log (this->name, GF_LOG_INFO, +                       "No local bricks found"); +        } +        _do_self_heal_on_subvol (this, local_child, FULL); +} + +int +_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) +{ +        int             i = 0; +        afr_private_t   *priv = NULL; + +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) +                afr_start_crawl (this, i, INDEX, _add_summary_to_dict, +                                 output, _gf_false, 0, NULL); +        return 0; +} + +int +_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) +{ +        afr_private_t           *priv = NULL; +        afr_self_heald_t        *shd = NULL; +        int                     i = 0; + +        priv = this->private; +        shd = &priv->shd; + +        for (i = 0; i < priv->child_count; i++) { +                if (shd->pos[i] != AFR_POS_LOCAL) +                        continue; +                _add_eh_to_dict (this, eh, dict, shd->sh_times[i], i); +        } +        return 0; +} + +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) +{ +        gf_xl_afr_op_t   op = GF_AFR_OP_INVALID; +        int              ret = 0; +        afr_private_t    *priv = NULL; +        afr_self_heald_t *shd = NULL; +        int              xl_id = 0; + +        priv = this->private; +        shd = &priv->shd; + +        ret = dict_get_int32 (input, "xl-op", (int32_t*)&op); +        if (ret) +                goto out; +        ret = dict_get_int32 (input, this->name, &xl_id); +        if (ret) +                goto out; +        ret = dict_set_int32 (output, this->name, xl_id); +        if (ret) +                goto out; +        switch (op) { +        case GF_AFR_OP_HEAL_INDEX: +                _do_self_heal_on_local_subvols (this, INDEX); +                ret = 0; +                break; +        case GF_AFR_OP_HEAL_FULL: +                _do_self_heal_on_local_subvol (this, FULL); +                ret = 0; +                break; +        case GF_AFR_OP_INDEX_SUMMARY: +                ret = _get_index_summary_on_local_subvols (this, output); +                if (ret) +                        goto out; +                break; +        case GF_AFR_OP_HEALED_FILES: +                ret = _add_all_subvols_eh_to_dict (this, shd->healed, output); +                break; +        case GF_AFR_OP_HEAL_FAILED_FILES: +                ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed, +                                                   output); +                break; +        case GF_AFR_OP_SPLIT_BRAIN_FILES: +                ret = _add_all_subvols_eh_to_dict (this, shd->split_brain, +                                                   output); +                break; +        default: +                gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); +                break; +        } +out: +        dict_del (output, this->name); +        return ret; +}  void  afr_do_poll_self_heal (void *data) @@ -39,21 +481,14 @@ afr_do_poll_self_heal (void *data)          struct timeval   timeout = {0};          xlator_t         *this = NULL;          long             child = (long)data; -        int              i = 0;          this = THIS;          priv = this->private;          shd = &priv->shd; -        if (child == AFR_ALL_CHILDREN) { //done by command -                for (i = 0; i < priv->child_count; i++) -                        afr_start_crawl (this, i, INDEX); +        _do_self_heal_on_subvol (this, child, INDEX); +        if (shd->pos[child] == AFR_POS_REMOTE)                  goto out; -        } else { -                afr_start_crawl (this, child, INDEX); -                if (shd->pos[child] == AFR_POS_REMOTE) -                        goto out; -        }          timeout.tv_sec = AFR_POLL_TIMEOUT;          timeout.tv_usec = 0;          if (shd->timer[child]) @@ -71,9 +506,6 @@ out:  }  static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, -                  xlator_t *readdir_xl); -static int  get_pathinfo_host (char *pathinfo, char *hostname, size_t size)  {          char    *start = NULL; @@ -132,15 +564,16 @@ out:  int  afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, -                           loc_t *dirloc, xlator_t *readdir_xl) +                           loc_t *dirloc)  {          afr_private_t *priv = NULL;          dict_t        *xattr = NULL;          void          *index_gfid = NULL;          loc_t         rootloc = {0}; -        struct iatt   iatt = {0}; +        struct iatt   iattr = {0};          struct iatt   parent = {0};          int           ret = 0; +        xlator_t      *readdir_xl = crawl_data->readdir_xl;          priv = this->private;          if (crawl_data->crawl == FULL) { @@ -167,13 +600,13 @@ afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,                  dirloc->path = "";                  dirloc->inode = inode_new (priv->root_inode->table);                  ret = syncop_lookup (readdir_xl, dirloc, NULL, -                                     &iatt, NULL, &parent); +                                     &iattr, NULL, &parent);                  if (ret < 0) {                          gf_log (this->name, GF_LOG_ERROR, "lookup failed on "                                  "index dir on %s", readdir_xl->name);                          goto out;                  } -                inode_link (dirloc->inode, NULL, NULL, &iatt); +                inode_link (dirloc->inode, NULL, NULL, &iattr);          }          ret = 0;  out: @@ -185,7 +618,7 @@ out:  int  afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, -                   loc_t *dirloc, xlator_t *readdir_xl) +                   loc_t *dirloc)  {          fd_t          *fd   = NULL;          int           ret = 0; @@ -199,7 +632,7 @@ afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,                          goto out;                  } -                ret = syncop_opendir (readdir_xl, dirloc, fd); +                ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);                  if (ret < 0) {                          gf_log (this->name, GF_LOG_ERROR,                                  "opendir failed on %s", dirloc->path); @@ -247,7 +680,7 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,  }  gf_boolean_t -_crawl_proceed (xlator_t *this, int child) +_crawl_proceed (xlator_t *this, int child, int crawl_flags)  {          afr_private_t *priv = this->private;          gf_boolean_t proceed = _gf_false; @@ -258,77 +691,33 @@ _crawl_proceed (xlator_t *this, int child)                  goto out;          } -        if (afr_up_children_count (priv->child_up, -                                   priv->child_count) < 2) { -                gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " -                        "< 2 children are up"); -                goto out; -        } -        proceed = _gf_true; -out: -        return proceed; -} - -static int -_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) -{ -        int             ret = 0; - -        uuid_copy (loc->pargfid, parent->inode->gfid); -        loc->path = ""; -        loc->name = name; -        loc->parent = inode_ref (parent->inode); -        if (!loc->parent) { -                loc->path = NULL; -                loc_wipe (loc); -                ret = -1; -        } -        return ret; -} - -void -_index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc, -                              gf_dirent_t *entry, int op_ret, int op_errno, -                              xlator_t *readdir_xl) -{ -        loc_t            index_loc = {0}; -        int              ret = 0; - -        if (op_ret && (op_errno == ENOENT)) { -                ret = _build_index_loc (this, &index_loc, entry->d_name, -                                        parentloc); -                if (ret) +        if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) { +                if (afr_up_children_count (priv->child_up, +                                           priv->child_count) < 2) { +                        gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " +                                "< 2 children are up");                          goto out; -                gf_log (this->name, GF_LOG_INFO, "Removing stale index " -                        "for %s on %s", index_loc.name, readdir_xl->name); -                ret = syncop_unlink (readdir_xl, &index_loc); -                if (ret) { -                        gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" -                                " index on %s - %s", index_loc.name, -                                readdir_xl->name, strerror (errno));                  } -                index_loc.path = NULL; -                loc_wipe (&index_loc);          } +        proceed = _gf_true;  out: -        return; +        return proceed;  }  static int -_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, -                    off_t *offset, afr_crawl_data_t *crawl_data, -                    xlator_t *readdir_xl) +_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, +                  off_t *offset, afr_crawl_data_t *crawl_data)  {          gf_dirent_t      *entry = NULL;          gf_dirent_t      *tmp = NULL; -        struct iatt      iatt = {0}; -        struct iatt      parent = {0};          int              ret = 0;          loc_t            entry_loc = {0};          fd_t             *fd = NULL; +        struct iatt      iattr = {0};          list_for_each_entry_safe (entry, tmp, &entries->list, list) { -                if (!_crawl_proceed (this, crawl_data->child)) { +                if (!_crawl_proceed (this, crawl_data->child, +                                     crawl_data->crawl_flags)) {                          ret = -1;                          goto out;                  } @@ -344,62 +733,51 @@ _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,                          continue;                  } +                if (crawl_data->crawl == INDEX) +                        entry_loc.path = NULL;//HACK                  loc_wipe (&entry_loc);                  ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,                                                   entry, crawl_data);                  if (ret)                          goto out; -                if (uuid_is_null (entry_loc.gfid)) { -                        gf_log (this->name, GF_LOG_WARNING, "failed to build " -                                "location for %s", entry->d_name); -                        continue; -                } -                if (entry_loc.path) -                        gf_log (this->name, GF_LOG_DEBUG, "lookup %s", -                                entry_loc.path); -                else -                        gf_log (this->name, GF_LOG_DEBUG, "lookup %s", -                                uuid_utoa (entry_loc.gfid)); - -                ret = syncop_lookup (this, &entry_loc, NULL, -                                     &iatt, NULL, &parent); -                if (crawl_data->crawl == INDEX) { -                        _index_crawl_post_lookup_fop (this, parentloc, entry, -                                                      ret, errno, readdir_xl); -                        entry_loc.path = NULL; -                        loc_wipe (&entry_loc); +                ret = crawl_data->process_entry (this, crawl_data, entry, +                                                 &entry_loc, parentloc, &iattr); + +                if (crawl_data->crawl == INDEX)                          continue; -                } -                //Don't fail the crawl if lookup fails as it -                //could be because of split-brain -                if (ret || (!IA_ISDIR (iatt.ia_type))) +                if (ret || !IA_ISDIR (iattr.ia_type))                          continue; -                inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt); -                ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc, -                                         readdir_xl); + +                inode_link (entry_loc.inode, parentloc->inode, NULL, &iattr); + +                fd = NULL; +                ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);                  if (ret)                          continue; -                ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl); -                fd_unref (fd); +                ret = _crawl_directory (fd, &entry_loc, crawl_data); +                if (fd) +                        fd_unref (fd);          }          ret = 0;  out: +        if (crawl_data->crawl == INDEX) +                entry_loc.path = NULL;          if (entry_loc.path)                  loc_wipe (&entry_loc);          return ret;  }  static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, -                  xlator_t *readdir_xl) +_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)  {          xlator_t        *this = NULL;          off_t           offset   = 0;          gf_dirent_t     entries;          int             ret = 0;          gf_boolean_t    free_entries = _gf_false; +        xlator_t        *readdir_xl = crawl_data->readdir_xl;          INIT_LIST_HEAD (&entries.list);          this = THIS; @@ -424,15 +802,16 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data,                  ret = 0;                  free_entries = _gf_true; -                if (!_crawl_proceed (this, crawl_data->child)) { +                if (!_crawl_proceed (this, crawl_data->child, +                                     crawl_data->crawl_flags)) {                          ret = -1;                          goto out;                  }                  if (list_empty (&entries.list))                          goto out; -                ret = _perform_self_heal (this, loc, &entries, &offset, -                                          crawl_data, readdir_xl); +                ret = _process_entries (this, loc, &entries, &offset, +                                        crawl_data);                  gf_dirent_free (&entries);                  free_entries = _gf_false;          } @@ -515,14 +894,6 @@ out:          return ret;  } -static int -afr_crawl_done  (int ret, call_frame_t *sync_frame, void *data) -{ -        GF_FREE (data); -        STACK_DESTROY (sync_frame->root); -        return 0; -} -  static inline int  afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)  { @@ -530,17 +901,74 @@ afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)  }  static int -afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) +afr_dir_crawl (void *data) +{ +        xlator_t            *this = NULL; +        afr_private_t       *priv = NULL; +        afr_self_heald_t    *shd = NULL; +        int                 ret = -1; +        xlator_t            *readdir_xl = NULL; +        fd_t                *fd = NULL; +        loc_t               dirloc = {0}; +        afr_crawl_data_t    *crawl_data = data; + +        this = THIS; +        priv = this->private; +        shd = &priv->shd; + +        if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags)) +                goto out; + +        ret = afr_find_child_position (this, crawl_data->child); +        if (ret) +                goto out; + +        if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) +                goto out; + +        readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); +        if (!readdir_xl) +                goto out; +        crawl_data->readdir_xl = readdir_xl; + +        ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc); +        if (ret) +                goto out; + +        ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc); +        if (ret) +                goto out; + +        ret = _crawl_directory (fd, &dirloc, crawl_data); +        if (ret) +                gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", +                        readdir_xl->name); +        else +                gf_log (this->name, GF_LOG_INFO, "Crawl completed " +                        "on %s", readdir_xl->name); +        if (crawl_data->crawl == INDEX) +                dirloc.path = NULL; +out: +        if (fd) +                fd_unref (fd); +        if (crawl_data->crawl == INDEX) +                dirloc.path = NULL; +        loc_wipe (&dirloc); +        return ret; +} + +static int +afr_dir_exclusive_crawl (void *data)  {          afr_private_t    *priv = NULL;          afr_self_heald_t *shd = NULL; -        loc_t            dirloc = {0};          gf_boolean_t     crawl = _gf_false;          int              ret = 0; -        xlator_t         *readdir_xl = NULL; -        fd_t             *fd = NULL;          int              child = -1; +        xlator_t         *this = NULL; +        afr_crawl_data_t *crawl_data = data; +        this = THIS;          priv = this->private;          shd = &priv->shd;          child = crawl_data->child; @@ -548,7 +976,8 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)          LOCK (&priv->lock);          {                  if (shd->inprogress[child]) { -                        shd->pending[child] = _gf_true; +                        if (shd->pending[child] != FULL) +                                shd->pending[child] = crawl_data->crawl;                  } else {                          shd->inprogress[child] = _gf_true;                          crawl = _gf_true; @@ -556,11 +985,6 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)          }          UNLOCK (&priv->lock); -        if (!priv->root_inode) { -                ret = -1; -                goto out; -        } -          if (!crawl) {                  gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "                          "for %s", priv->children[child]->name); @@ -568,87 +992,35 @@ afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data)          }          do { -                readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); -                if (!readdir_xl) -                        goto done; -                ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc, -                                                 readdir_xl); -                if (ret) -                        goto done; -                ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc, -                                         readdir_xl); -                if (ret) -                        goto done; -                ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl); -                if (ret) -                        gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", -                                readdir_xl->name); -                else -                        gf_log (this->name, GF_LOG_INFO, "Crawl completed " -                                "on %s", readdir_xl->name); -                fd_unref (fd); -                fd = NULL; -done: +                afr_dir_crawl (data);                  LOCK (&priv->lock);                  { -                        if (shd->pending[child]) { -                                shd->pending[child] = _gf_false; +                        if (shd->pending[child] != NONE) { +                                crawl_data->crawl = shd->pending[child]; +                                shd->pending[child] = NONE;                          } else {                                  shd->inprogress[child] = _gf_false;                                  crawl = _gf_false;                          }                  }                  UNLOCK (&priv->lock); -                if (crawl_data->crawl == INDEX) { -                        dirloc.path = NULL; -                        loc_wipe (&dirloc); -                }          } while (crawl);  out: -        if (fd) -                fd_unref (fd); -        if (crawl_data->crawl == INDEX) { -                dirloc.path = NULL; -                loc_wipe (&dirloc); -        } -        return ret; -} - -static int -afr_crawl (void *data) -{ -        xlator_t         *this = NULL; -        afr_private_t    *priv = NULL; -        afr_self_heald_t *shd = NULL; -        int              ret = -1; -        afr_crawl_data_t *crawl_data = data; - -        this = THIS; -        priv = this->private; -        shd = &priv->shd; - -        if (!_crawl_proceed (this, crawl_data->child)) -                goto out; -        ret = afr_find_child_position (this, crawl_data->child); -        if (ret) -                goto out; - -        if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) -                goto out; - -        ret = afr_crawl_directory (this, crawl_data); -out:          return ret;  }  void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl) +afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, +                 process_entry_cbk_t process_entry, void *op_data, +                 gf_boolean_t exclusive, int crawl_flags, +                 afr_crawl_done_cbk_t crawl_done)  {          afr_private_t              *priv = NULL;          afr_self_heald_t           *shd = NULL;          call_frame_t               *frame = NULL;          afr_crawl_data_t           *crawl_data = NULL;          int                        ret = 0; +        int (*crawler) (void*) = NULL;          priv = this->private;          shd = &priv->shd; @@ -662,16 +1034,24 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl)          afr_set_lk_owner (frame, this);          afr_set_low_priority (frame);          crawl_data = GF_CALLOC (1, sizeof (*crawl_data), -                                gf_afr_mt_afr_crawl_data_t); +                                gf_afr_mt_crawl_data_t);          if (!crawl_data)                  goto out; +        crawl_data->process_entry = process_entry;          crawl_data->child = idx;          crawl_data->pid = frame->root->pid;          crawl_data->crawl = crawl; -        gf_log (this->name, GF_LOG_INFO, "starting crawl for %s", -                priv->children[idx]->name); -        ret = synctask_new (this->ctx->env, afr_crawl, -                            afr_crawl_done, frame, crawl_data); +        crawl_data->op_data = op_data; +        crawl_data->crawl_flags = crawl_flags; +        gf_log (this->name, GF_LOG_INFO, "starting crawl %d for %s", +                crawl_data->crawl, priv->children[idx]->name); + +        if (exclusive) +                crawler = afr_dir_exclusive_crawl; +        else +                crawler = afr_dir_crawl; +        ret = synctask_new (this->ctx->env, crawler, +                            crawl_done, frame, crawl_data);          if (ret)                  gf_log (this->name, GF_LOG_ERROR, "Could not create the "                          "task for %d ret %d", idx, ret); @@ -679,16 +1059,6 @@ out:          return;  } -//void -//afr_full_self_heal (xlator_t *this) -//{ -//        int     i = 0; -//        afr_private_t *priv = this->private; -// -//        for (i = 0; i < priv->child_count; i++) -//                afr_start_crawl (this, i, FULL); -//} -  void  afr_build_root_loc (xlator_t *this, loc_t *loc)  { diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index eb1021995..44fd9f385 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -26,17 +26,22 @@  #define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))  #define AFR_ALL_CHILDREN -1 -typedef enum { -        INDEX, -        FULL, -} afr_crawl_type_t;  typedef struct afr_crawl_data_ { -        int              child; -        pid_t            pid; -        afr_crawl_type_t crawl; -        xlator_t         *readdir_xl; +        int                 child; +        pid_t               pid; +        afr_crawl_type_t    crawl; +        xlator_t            *readdir_xl; +        void                *op_data; +        int                 crawl_flags; +        int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data, +                              gf_dirent_t *entry, loc_t *child, loc_t *parent, +                              struct iatt *iattr);  } afr_crawl_data_t; +typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data, +                              gf_dirent_t *entry, loc_t *child, loc_t *parent, +                              struct iatt *iattr); +  void afr_proactive_self_heal (xlator_t *this, int idx);  void afr_build_root_loc (xlator_t *this, loc_t *loc); @@ -48,4 +53,7 @@ afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);  void  afr_do_poll_self_heal (void *data); + +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);  #endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index abc6aa3e5..8e2ef1008 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -30,7 +30,10 @@  #endif  #include "afr-common.c" -#define SHD_INODE_LRU_LIMIT     100 +#define SHD_INODE_LRU_LIMIT          100 +#define AFR_EH_HEALED_LIMIT          1024 +#define AFR_EH_HEAL_FAIL_LIMIT       1024 +#define AFR_EH_SPLIT_BRAIN_LIMIT     1024  struct volume_options options[]; @@ -39,8 +42,13 @@ notify (xlator_t *this, int32_t event,          void *data, ...)  {          int ret = -1; +        va_list         ap; +        void *data2 = NULL; -        ret = afr_notify (this, event, data); +        va_start (ap, data); +        data2 = va_arg (ap, dict_t*); +        va_end (ap); +        ret = afr_notify (this, event, data, data2);          return ret;  } @@ -342,42 +350,55 @@ init (xlator_t *this)                  goto out;          } -        priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, -                                   gf_afr_mt_afr_brick_pos_t); -        if (!priv->shd.pos) { -                ret = -ENOMEM; +        priv->first_lookup = 1; +        priv->root_inode = NULL; + +        if (!priv->shd.enabled) { +                ret = 0;                  goto out;          } +        ret = -ENOMEM; +        priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, +                                   gf_afr_mt_brick_pos_t); +        if (!priv->shd.pos) +                goto out; +          priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count, -                                       gf_afr_mt_afr_shd_bool_t); -        if (!priv->shd.pending) { -                ret = -ENOMEM; +                                       gf_afr_mt_int32_t); +        if (!priv->shd.pending)                  goto out; -        }          priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress), -                                          child_count, -                                          gf_afr_mt_afr_shd_bool_t); -        if (!priv->shd.inprogress) { -                ret = -ENOMEM; +                                          child_count, gf_afr_mt_shd_bool_t); +        if (!priv->shd.inprogress)                  goto out; -        }          priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count, -                                     gf_afr_mt_afr_shd_timer_t); -        if (!priv->shd.timer) { -                ret = -ENOMEM; +                                     gf_afr_mt_shd_timer_t); +        if (!priv->shd.timer) +                goto out; + +        priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false); +        if (!priv->shd.healed) +                goto out; + +        priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false); +        if (!priv->shd.heal_failed) +                goto out; + +        priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false); +        if (!priv->shd.split_brain) +                goto out; + +        priv->shd.sh_times = GF_CALLOC (priv->child_count, +                                        sizeof (*priv->shd.sh_times), +                                        gf_afr_mt_time_t); +        if (!priv->shd.sh_times) +                goto out; + +        this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); +        if (!this->itable)                  goto out; -        } -        if (priv->shd.enabled) { -                this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); -                if (!this->itable) { -                        ret = -ENOMEM; -                        goto out; -                } -        } -        priv->first_lookup = 1; -        priv->root_inode = NULL;          ret = 0;  out: @@ -393,6 +414,8 @@ fini (xlator_t *this)          priv = this->private;          this->private = NULL;          afr_priv_destroy (priv); +        if (this->itable);//I dont see any destroy func +          return 0;  } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f3d372de5..0f4a6d90a 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -88,12 +88,22 @@ typedef struct afr_inode_ctx_ {          int32_t  *fresh_children;//increasing order of latency  } afr_inode_ctx_t; +typedef enum { +        NONE, +        INDEX, +        FULL, +} afr_crawl_type_t; +  typedef struct afr_self_heald_ { -        gf_boolean_t    enabled; -        gf_boolean_t    *pending; -        gf_boolean_t    *inprogress; -        afr_child_pos_t *pos; -        gf_timer_t      **timer; +        gf_boolean_t     enabled; +        afr_crawl_type_t *pending; +        gf_boolean_t     *inprogress; +        afr_child_pos_t  *pos; +        time_t           *sh_times; +        gf_timer_t       **timer; +        eh_t             *healed; +        eh_t             *heal_failed; +        eh_t             *split_brain;  } afr_self_heald_t;  typedef struct _afr_private { @@ -747,8 +757,7 @@ int  pump_command_reply (call_frame_t *frame, xlator_t *this);  int32_t -afr_notify (xlator_t *this, int32_t event, -            void *data, ...); +afr_notify (xlator_t *this, int32_t event, void *data, void *data2);  int  afr_attempt_lock_recovery (xlator_t *this, int32_t child_index); diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 281bfd722..eae7899e9 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -2334,7 +2334,7 @@ notify (xlator_t *this, int32_t event,          child_xl = (xlator_t *) data; -        ret = afr_notify (this, event, data); +        ret = afr_notify (this, event, data, NULL);  	switch (event) {  	case GF_EVENT_CHILD_DOWN:  | 
