diff options
| author | Pranith K <pranithk@gluster.com> | 2011-07-14 06:31:08 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-07-17 07:45:29 -0700 | 
| commit | 5dff9a2938c199285662bb5b33d7e3aeda0e3fb6 (patch) | |
| tree | 20af21a206a8ed7937101005cbb4b9128b5c48fd /xlators/cluster/afr/src | |
| parent | 84c3d7a83a8c84ca11514202a1bc365026fd1c87 (diff) | |
cluster/afr: Fix conflict files and gfid self-heal
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2745 (failure to detect split brain)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2745
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 1172 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 24 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 30 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 104 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 74 | 
7 files changed, 811 insertions, 600 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 894442936..d6b358e2b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1357,6 +1357,11 @@ afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)                  priv->child_count * sizeof (*sh->parentbufs));          afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count); +        if (local->cont.lookup.xattr) { +                dict_unref (local->cont.lookup.xattr); +                local->cont.lookup.xattr = NULL; +        } +          for (i = 0; i < priv->child_count; i++) {                  if (sh->xattr[i])                          local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]); diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 6da666804..58e979791 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -54,6 +54,8 @@ afr_build_parent_loc (loc_t *parent, loc_t *child)          char *tmp = NULL;          if (!child->parent) { +                //this should never be called with root as the child +                GF_ASSERT (0);                  loc_copy (parent, child);                  return;          } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b28f9114f..d76e6c8de 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -27,6 +27,21 @@  #include "afr-self-heal.h"  #include "pump.h" +//Intersection[child]=1 if child is part of intersection +void +afr_children_intersection_get (int32_t *set1, int32_t *set2, +                               int *intersection, unsigned int child_count) +{ +        int                      i = 0; + +        memset (intersection, 0, sizeof (*intersection) * child_count); +        for (i = 0; i < child_count; i++) { +                intersection[i] = afr_is_child_present (set1, child_count, i) +                                     && afr_is_child_present (set2, child_count, +                                                              i); +        } +} +  /**   * select_source - select a source and return it   */ @@ -71,6 +86,14 @@ afr_sh_source_count (int sources[], int child_count)  }  void +afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno) +{ +        sh->op_ret = -1; +        if (afr_error_more_important (sh->op_errno, op_errno)) +                sh->op_errno = op_errno; +} + +void  afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)  {          afr_private_t *  priv = this->private; @@ -186,31 +209,6 @@ out:          return ret;  } - -/** - * mark_sources: Mark all 'source' nodes and return number of source - * nodes found - * - * A node (a row in the pending matrix) belongs to one of - * three categories: - * - * M is the pending matrix. - * - * 'innocent' - M[i] is all zeroes - * 'fool'     - M[i] has i'th element = 1 (self-reference) - * 'wise'     - M[i] has i'th element = 0, others are 1 or 0. - * - * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is - * needed. - * - * A 'wise' node can be a source. If two 'wise' nodes conflict, it is - * a split-brain. If one wise node refers to the other but the other doesn't - * refer back, the referrer is a source. - * - * All fools are sinks, unless there are no 'wise' nodes. In that case, - * one of the fools is made a source. - */ -  typedef enum {          AFR_NODE_INNOCENT,          AFR_NODE_FOOL, @@ -585,6 +583,60 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,  }  int +afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, +                   int32_t **pending_matrix, int32_t *sources, +                   int32_t *success_children, afr_transaction_type type) +{ +        afr_private_t           *priv = NULL; +        afr_self_heal_type      sh_type    = AFR_SELF_HEAL_INVALID; +        int                     nsources   = -1; + +        priv = this->private; + +        if (afr_get_children_count (success_children, priv->child_count) == 0) +                goto out; + +        afr_build_pending_matrix (priv->pending_key, pending_matrix, +                                  xattr, type, priv->child_count); + +        sh_type = afr_self_heal_type_for_transaction (type); +        if (AFR_SELF_HEAL_INVALID == sh_type) +                goto out; + +        afr_sh_print_pending_matrix (pending_matrix, this); + +        nsources = afr_mark_sources (sources, pending_matrix, bufs, +                                     priv->child_count, sh_type, +                                     success_children, this->name); +out: +        return nsources; +} + +/** + * mark_sources: Mark all 'source' nodes and return number of source + * nodes found + * + * A node (a row in the pending matrix) belongs to one of + * three categories: + * + * M is the pending matrix. + * + * 'innocent' - M[i] is all zeroes + * 'fool'     - M[i] has i'th element = 1 (self-reference) + * 'wise'     - M[i] has i'th element = 0, others are 1 or 0. + * + * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is + * needed. + * + * A 'wise' node can be a source. If two 'wise' nodes conflict, it is + * a split-brain. If one wise node refers to the other but the other doesn't + * refer back, the referrer is a source. + * + * All fools are sinks, unless there are no 'wise' nodes. In that case, + * one of the fools is made a source. + */ + +int  afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,                    int32_t child_count, afr_self_heal_type type,                    int32_t *valid_children, const char *xlator_name) @@ -886,7 +938,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)                  sh->xattr[i] = NULL;          } -        if (local->govinda_gOvinda) { +        if (local->govinda_gOvinda || sh->op_failed) {                  gf_log (this->name, GF_LOG_INFO,                          "split brain found, aborting selfheal of %s",                          local->loc.path); @@ -904,7 +956,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)  static int -sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) +afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -918,524 +970,751 @@ sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)          return 0;  } - -static int -sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                int32_t op_ret, int op_errno, -                struct iatt *preop, struct iatt *postop) +static void +afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie, +                                   xlator_t *this, +                                   int32_t op_ret, int32_t op_errno, +                                   inode_t *inode, struct iatt *buf, +                                   dict_t *xattr, struct iatt *postparent)  { -        afr_local_t *local      = NULL; -        loc_t       *parent_loc = cookie; -        int          call_count = 0; +        int              child_index = 0; +        afr_local_t     *local = NULL; +        afr_private_t   *priv = NULL; +        afr_self_heal_t *sh = NULL;          local = frame->local; +        priv = this->private; +        sh   = &local->self_heal; +        child_index = (long) cookie; -        if (op_ret == -1) { -                gf_log (this->name, GF_LOG_INFO, -                        "setattr on %s failed: %s", -                        local->loc.path, strerror (op_errno)); +        LOCK (&frame->lock); +        { +                if (op_ret == 0) { +                        sh->buf[child_index] = *buf; +                        sh->parentbuf        = *postparent; +                        sh->parentbufs[child_index] = *postparent; +                        sh->success_children[sh->success_count] = child_index; +                        sh->success_count++; +                        sh->xattr[child_index] = dict_ref (xattr); +                } else { +                        gf_log (this->name, GF_LOG_ERROR, +                                "path %s on subvolume %s => -1 (%s)", +                                local->loc.path, +                                priv->children[child_index]->name, +                                strerror (op_errno)); +                        local->self_heal.child_errno[child_index] = op_errno; +                }          } +        UNLOCK (&frame->lock); +        return; +} -        if (parent_loc) { -                loc_wipe (parent_loc); -                GF_FREE (parent_loc); +gf_boolean_t +afr_valid_ia_type (ia_type_t ia_type) +{ +        switch (ia_type) { +        case IA_IFSOCK: +        case IA_IFREG: +        case IA_IFBLK: +        case IA_IFCHR: +        case IA_IFIFO: +        case IA_IFLNK: +        case IA_IFDIR: +                return _gf_true; +        default: +                return _gf_false;          } +        return _gf_false; +} -        call_count = afr_frame_return (frame); +void +afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this, +                                    int child_index, struct iatt *buf, +                                    struct iatt *postparent, +                                    afr_impunge_done_cbk_t impunge_done) +{ +        call_frame_t    *impunge_frame = NULL; +        afr_local_t     *local = NULL; +        afr_local_t     *impunge_local = NULL; +        afr_self_heal_t *sh = NULL; +        afr_self_heal_t *impunge_sh = NULL; +        int32_t         op_errno = 0; -        if (call_count == 0) { -                STACK_DESTROY (frame->root); +        impunge_frame = copy_frame (frame); +        if (!impunge_frame) { +                op_errno = ENOMEM; +                goto out;          } -        return 0; -} +        ALLOC_OR_GOTO (impunge_local, afr_local_t, out); +        local = frame->local; +        sh = &local->self_heal; +        impunge_frame->local = impunge_local; +        impunge_sh = &impunge_local->self_heal; +        impunge_sh->sh_frame = frame; +        impunge_sh->active_source = sh->source; +        impunge_sh->impunging_entry_mode = st_mode_from_ia (buf->ia_prot, +                                                            buf->ia_type); +        impunge_sh->impunge_ret_child = child_index; +        loc_copy (&impunge_local->loc, &local->loc); +        sh->impunge_done = impunge_done; +        impunge_local->call_count = 1; +        afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf, +                                     postparent); +        return; +out: +        gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s", +                local->loc.path, strerror (op_errno)); +        impunge_done (frame, this, child_index, -1, op_errno); +} -static int -sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie, -                                 xlator_t *this, -                                 int32_t op_ret, int32_t op_errno, -                                 inode_t *inode, struct iatt *buf, -                                 struct iatt *preparent, -                                 struct iatt *postparent) -{ -        afr_local_t     *local         = NULL; -        afr_self_heal_t *sh            = NULL; -        afr_private_t   *priv          = NULL; -        call_frame_t    *setattr_frame = NULL; -        int              call_count    = 0; -        int              child_index   = 0; -        loc_t           *parent_loc    = NULL; -        struct iatt      stbuf         = {0,}; -        int32_t          valid         = 0; +int +afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child, +                         int32_t op_ret, int32_t op_errno) +{ +        int             call_count = 0; +        afr_local_t     *local = NULL;          local = frame->local; -        sh    = &local->self_heal; -        priv  = this->private; -        child_index = (long) cookie; - -        stbuf.ia_atime = sh->buf[sh->source].ia_atime; -        stbuf.ia_atime_nsec = sh->buf[sh->source].ia_atime_nsec; -        stbuf.ia_mtime = sh->buf[sh->source].ia_mtime; -        stbuf.ia_mtime_nsec = sh->buf[sh->source].ia_mtime_nsec; +        if (op_ret == -1) +                gf_log (this->name, GF_LOG_ERROR, +                        "create entry %s failed, on child %d reason, %s", +                        local->loc.path, child, strerror (op_errno)); +        call_count = afr_frame_return (frame); +        if (call_count == 0) +                afr_sh_missing_entries_finish (frame, this); +        return 0; +} -        stbuf.ia_uid = sh->buf[sh->source].ia_uid; -        stbuf.ia_gid = sh->buf[sh->source].ia_gid; +static int +sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        int              type = 0; +        afr_private_t   *priv = NULL; +        int             enoent_count = 0; +        int             i = 0; +        struct iatt     *buf = NULL; +        struct iatt     *postparent = NULL; -        valid = GF_SET_ATTR_UID   | GF_SET_ATTR_GID | -                GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; +        local = frame->local; +        sh = &local->self_heal; +        priv = this->private; -        if (op_ret == -1) { +        enoent_count = afr_errno_count (NULL, sh->child_errno, +                                        priv->child_count, ENOENT); +        if (enoent_count == 0) {                  gf_log (this->name, GF_LOG_INFO, -                        "%s: failed to mknod on %s (%s)", -                        local->loc.path, priv->children[child_index]->name, -                        strerror (op_errno)); +                        "no missing files - %s. proceeding to metadata check", +                        local->loc.path); +                /* proceed to next step - metadata self-heal */ +                afr_sh_missing_entries_finish (frame, this); +                return 0;          } -        if (op_ret == 0) { -                setattr_frame = copy_frame (frame); +        buf = &sh->buf[sh->source]; +        postparent = &sh->parentbufs[sh->source]; -                setattr_frame->local = GF_CALLOC (1, sizeof (afr_local_t), -                                                  gf_afr_mt_afr_local_t); - -                ((afr_local_t *)setattr_frame->local)->call_count = 2; - -                gf_log (this->name, GF_LOG_TRACE, -                        "setattr (%s) on subvolume %s", -                        local->loc.path, priv->children[child_index]->name); - -                STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk, -                                   (void *) (long) 0, -                                   priv->children[child_index], -                                   priv->children[child_index]->fops->setattr, -                                   &local->loc, &stbuf, valid); - -                valid      = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; -                parent_loc = GF_CALLOC (1, sizeof (*parent_loc), -                                        gf_afr_mt_loc_t); -                afr_build_parent_loc (parent_loc, &local->loc); - -                STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk, -                                   (void *) (long) parent_loc, -                                   priv->children[child_index], -                                   priv->children[child_index]->fops->setattr, -                                   parent_loc, &sh->parentbuf, valid); +        type = buf->ia_type; +        if (!afr_valid_ia_type (type)) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: unknown file type: 0%o", local->loc.path, type); +                local->govinda_gOvinda = 1; +                afr_sh_missing_entries_finish (frame, this); +                goto out;          } -        call_count = afr_frame_return (frame); - -        if (call_count == 0) { -                sh_missing_entries_finish (frame, this); +        local->call_count = enoent_count; +        for (i = 0; i < priv->child_count; i++) { +                //If !child_up errno will be zero +                if (sh->child_errno[i] != ENOENT) +                        continue; +                afr_sh_call_entry_impunge_recreate (frame, this, i, +                                                    buf, postparent, +                                                    afr_sh_create_entry_cbk); +                enoent_count--;          } - +        GF_ASSERT (enoent_count == 0); +out:          return 0;  } - -static int -sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this) +void +afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL;          afr_private_t   *priv = NULL; -        int              i = 0; -        int              ret = 0; -        int              enoent_count = 0; -        int              call_count = 0; -        mode_t           st_mode = 0; -        dev_t            ia_rdev = 0; -        dict_t          *dict = NULL; -        dev_t            st_rdev = 0; +        int32_t         op_errno = 0; +        ia_type_t       ia_type = IA_INVAL; +        int32_t         nsources = 0;          local = frame->local;          sh = &local->self_heal;          priv = this->private; -        for (i = 0; i < priv->child_count; i++) -                if (sh->child_errno[i] == ENOENT) -                        enoent_count++; +        if (afr_get_children_count (sh->success_children, +                                    priv->child_count) == 0) { +                op_errno = afr_resultant_errno_get (NULL, sh->child_errno, +                                                    priv->child_count); +                goto out; +        } -        call_count = enoent_count; -        local->call_count = call_count; +        if (afr_gfid_missing_count (this->name, sh->success_children, +                                    sh->buf, priv->child_count, +                                    local->loc.path) || +            afr_conflicting_iattrs (sh->buf, sh->success_children, +                                    priv->child_count, local->loc.path, +                                    this->name)) { +                //this can happen if finding the fresh parent dir failed +                local->govinda_gOvinda = 1; +                sh->op_failed = 1; +                op_errno = EIO; +                goto out; +        } -        st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot, -                                   sh->buf[sh->source].ia_type); -        ia_rdev  = sh->buf[sh->source].ia_rdev; -        st_rdev = makedev (ia_major (ia_rdev), ia_minor (ia_rdev)); +        //now No chance for the ia_type to conflict +        ia_type = sh->buf[sh->success_children[0]].ia_type; +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->success_children, +                                      afr_transaction_type_get (ia_type)); +        if (nsources < 0) { +                gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," +                        " in missing entry self-heal, continuing with the rest" +                        " of the self-heals", local->loc.path); +                op_errno = EIO; +                goto out; +        } -        gf_log (this->name, GF_LOG_TRACE, -                "mknod %s mode 0%o device type %"PRId64" on %d subvolumes", -                local->loc.path, st_mode, (uint64_t)st_rdev, enoent_count); +        afr_get_fresh_children (sh->success_children, sh->sources, +                                sh->fresh_children, priv->child_count); +        sh->source = sh->fresh_children[0]; +        if (sh->source == -1) { +                gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); +                op_errno = EIO; +                goto out; +        } -        dict = dict_new (); -        if (!dict) -                gf_log (this->name, GF_LOG_ERROR, "out of memory"); +        if (sh->gfid_sh_success_cbk) +                sh->gfid_sh_success_cbk (frame, this); +        sh_missing_entries_create (frame, this); +        return; +out: +        afr_sh_set_error (sh, op_errno); +        afr_sh_missing_entries_finish (frame, this); +        return; +} -        ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid); -        if (ret) -                gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", -                        local->loc.path); +static int +afr_sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, +                                   xlator_t *this, int32_t op_ret, +                                   int32_t op_errno, inode_t *inode, +                                   struct iatt *buf, dict_t *xattr, +                                   struct iatt *postparent) +{ +        int                     call_count = 0; +        afr_local_t             *local = NULL; +        afr_self_heal_t         *sh = NULL; +        afr_private_t           *priv = NULL; -        for (i = 0; i < priv->child_count; i++) { -                if (sh->child_errno[i] == ENOENT) { -                        STACK_WIND_COOKIE (frame, -                                           sh_missing_entries_newentry_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->mknod, -                                           &local->loc, st_mode, st_rdev, dict); -                        if (!--call_count) -                                break; -                } -        } +        local = frame->local; +        sh = &local->self_heal; +        priv = this->private; + +        afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, +                                           op_errno, inode, buf, xattr, +                                           postparent); +        call_count = afr_frame_return (frame); -        if (dict) -                dict_unref (dict); +        if (call_count == 0) +                afr_sh_missing_entries_lookup_done (frame, this);          return 0;  } - -static int -sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this) +int +afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child, +                         int32_t op_ret, int32_t op_errno)  { +        int             call_count = 0;          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        afr_private_t   *priv = NULL; -        dict_t          *dict = NULL; -        int              i = 0; -        int              ret = 0; -        int              enoent_count = 0; -        int              call_count = 0; -        mode_t           st_mode = 0;          local = frame->local;          sh = &local->self_heal; -        priv = this->private; -        for (i = 0; i < priv->child_count; i++) -                if (sh->child_errno[i] == ENOENT) -                        enoent_count++; - -        call_count = enoent_count; -        local->call_count = call_count; - -        st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot, -                                   sh->buf[sh->source].ia_type); - -        dict = dict_new (); -        if (!dict) { +        GF_ASSERT (sh->post_remove_call); +        if ((op_ret == -1) && (op_errno != ENOENT)) {                  gf_log (this->name, GF_LOG_ERROR, -                        "Out of memory"); -                sh_missing_entries_finish (frame, this); -                return 0; +                        "purge entry %s failed, on child %d reason, %s", +                        local->loc.path, child, strerror (op_errno)); +                LOCK (&frame->lock); +                { +                        afr_sh_set_error (sh, EIO); +                        sh->op_failed = 1; +                } +                UNLOCK (&frame->lock);          } +        call_count = afr_frame_return (frame); +        if (call_count == 0) +                sh->post_remove_call (frame, this); +        return 0; +} -        ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid); -        if (ret) -                gf_log (this->name, GF_LOG_INFO, -                        "%s: inode gfid set failed", local->loc.path); +void +afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this, +                                  int child_index, struct iatt *buf, +                                  afr_expunge_done_cbk_t expunge_done) +{ +        call_frame_t    *expunge_frame = NULL; +        afr_local_t     *local = NULL; +        afr_local_t     *expunge_local = NULL; +        afr_self_heal_t *sh = NULL; +        afr_self_heal_t *expunge_sh = NULL; +        int32_t         op_errno = 0; +        expunge_frame = copy_frame (frame); +        if (!expunge_frame) { +                goto out; +        } -        gf_log (this->name, GF_LOG_TRACE, -                "mkdir %s mode 0%o on %d subvolumes", -                local->loc.path, st_mode, enoent_count); +        ALLOC_OR_GOTO (expunge_local, afr_local_t, out); -        for (i = 0; i < priv->child_count; i++) { -                if (sh->child_errno[i] == ENOENT) { -                        if (!strcmp (local->loc.path, "/")) { -                                /* We shouldn't try to create "/" */ +        local = frame->local; +        sh = &local->self_heal; +        expunge_frame->local = expunge_local; +        expunge_sh = &expunge_local->self_heal; +        expunge_sh->sh_frame = frame; +        loc_copy (&expunge_local->loc, &local->loc); +        sh->expunge_done = expunge_done; +        afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf); +        return; +out: +        gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s", +                local->loc.path, strerror (op_errno)); +        expunge_done (frame, this, child_index, -1, op_errno); +} -                                sh_missing_entries_finish (frame, this); +void +afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children, +                                 int32_t *fresh_children, +                                 unsigned int child_count) +{ +        int     i = 0; -                                return 0; -                        } else { -                                STACK_WIND_COOKIE (frame, -                                                   sh_missing_entries_newentry_cbk, -                                                   (void *) (long) i, -                                                   priv->children[i], -                                                   priv->children[i]->fops->mkdir, -                                                   &local->loc, st_mode, dict); -                                if (!--call_count) -                                        break; -                        } +        for (i = 0; i < child_count; i++) { +                if (afr_is_child_present (success_children, child_count, i) && +                    !afr_is_child_present (fresh_children, child_count, i)) { +                        sh->child_errno[i] = ENOENT; +                        GF_ASSERT (sh->xattr[i]); +                        dict_unref (sh->xattr[i]); +                        sh->xattr[i] = NULL;                  }          } +} -        if (dict) -                dict_unref (dict); +int +afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t         *local    = NULL; +        afr_self_heal_t     *sh       = NULL; +        afr_private_t       *priv     = NULL; +        local    = frame->local; +        sh       = &local->self_heal; +        priv     = this->private; + +        if (sh->op_failed) { +                afr_sh_missing_entries_finish (frame, this); +        } else { +                if (afr_gfid_missing_count (this->name, sh->fresh_children, +                                            sh->buf, priv->child_count, +                                            local->loc.path)) { +                        afr_sh_common_lookup (frame, this, &local->loc, +                                              afr_sh_missing_entries_lookup_cbk, +                                              _gf_true); +                } else { +                        //No need to set gfid so goto missing entries lookup done +                        //Behave as if you have done the lookup +                        afr_sh_remove_stale_lookup_info (sh, +                                                         sh->success_children, +                                                         sh->fresh_children, +                                                         priv->child_count); +                        afr_children_copy (sh->success_children, +                                           sh->fresh_children, +                                           priv->child_count); +                        afr_sh_missing_entries_lookup_done (frame, this); +                } +        }          return 0;  } - -static int -sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this, -                            const char *link, struct iatt *buf) +gf_boolean_t +afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv, +                              int child)  { -        afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        afr_private_t   *priv = NULL; -        dict_t          *dict = NULL; -        int              i = 0; -        int              ret = 0; -        int              enoent_count = 0; -        int              call_count = 0; - -        local = frame->local;          sh = &local->self_heal; -        priv = this->private; -        for (i = 0; i < priv->child_count; i++) -                if (sh->child_errno[i] == ENOENT) -                        enoent_count++; - -        call_count = enoent_count; -        local->call_count = call_count; +        if (local->child_up[child] && +            (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count, +                                    child)) +            && (sh->child_errno[child] != ENOENT)) +                return _gf_true; -        dict = dict_new (); -        if (!dict) { -                gf_log (this->name, GF_LOG_ERROR, -                        "Out of memory"); -                sh_missing_entries_finish (frame, this); -                return 0; -        } +        return _gf_false; +} -        ret = afr_set_dict_gfid (dict, buf->ia_gfid); -        if (ret) -                gf_log (this->name, GF_LOG_DEBUG, -                        "%s: dict gfid set failed", local->loc.path); +gf_boolean_t +afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv, +                                    int child) +{ +        afr_self_heal_t *sh = NULL; -        gf_log (this->name, GF_LOG_TRACE, -                "symlink %s -> %s on %d subvolumes", -                local->loc.path, link, enoent_count); +        sh = &local->self_heal; -        for (i = 0; i < priv->child_count; i++) { -                if (sh->child_errno[i] == ENOENT) { -                        STACK_WIND_COOKIE (frame, -                                           sh_missing_entries_newentry_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->symlink, -                                           link, &local->loc, dict); -                        if (!--call_count) -                                break; -                } -        } +        if (local->child_up[child] && +            (!afr_is_child_present (sh->fresh_children, priv->child_count, +                                    child)) +             && (sh->child_errno[child] != ENOENT)) +                return _gf_true; -        return 0; +        return _gf_false;  } - -static int -sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie, -                                 xlator_t *this, -                                 int32_t op_ret, int32_t op_errno, -                                 const char *link, struct iatt *sbuf) +void +afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this, +                           gf_boolean_t purge_condition (afr_local_t *local, +                                                         afr_private_t *priv, +                                                         int child))  {          afr_local_t     *local = NULL; -        afr_self_heal_t *sh = NULL;          afr_private_t   *priv = NULL; +        afr_self_heal_t *sh = NULL; +        int             i = 0; +        int             call_count = 0;          local = frame->local;          sh = &local->self_heal;          priv = this->private; -        if (op_ret > 0) -                sh_missing_entries_symlink (frame, this, link, sbuf); -        else { -                gf_log (this->name, GF_LOG_INFO, -                        "%s: failed to do readlink on %s (%s)", -                        local->loc.path, priv->children[sh->source]->name, -                        strerror (op_errno)); -                sh_missing_entries_finish (frame, this); +        for (i = 0; i < priv->child_count; i++) { +                if (purge_condition (local, priv, i)) +                        call_count++;          } -        return 0; -} +        if (call_count == 0) { +                sh->post_remove_call (frame, this); +                goto out; +        } +        local->call_count = call_count; +        for (i = 0; i < priv->child_count; i++) { +                if (!purge_condition (local, priv, i)) +                        continue; +                afr_sh_call_entry_expunge_remove (frame, this, +                                                  (long) i, &sh->buf[i], +                                                  afr_sh_remove_entry_cbk); +        } +out: +        return; +} -static int -sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this) +void +afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        afr_private_t   *priv = NULL;          local = frame->local;          sh = &local->self_heal; -        priv = this->private; - -        STACK_WIND (frame, sh_missing_entries_readlink_cbk, -                    priv->children[sh->source], -                    priv->children[sh->source]->fops->readlink, -                    &local->loc, 4096); +        sh->post_remove_call = afr_sh_missing_entries_finish; -        return 0; +        afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);  } - -static int -sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +void +afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        int              type = 0; -        int              i = 0;          afr_private_t   *priv = NULL; -        int              enoent_count = 0; -        int              govinda_gOvinda = 0; +        int             i = 0;          local = frame->local;          sh = &local->self_heal;          priv = this->private; +        sh->post_remove_call = afr_sh_purge_stale_entries_done; +          for (i = 0; i < priv->child_count; i++) { -                if (!local->child_up[i]) +                if (afr_is_child_present (sh->fresh_children, +                                          priv->child_count, i))                          continue; -                if (sh->child_errno[i]) { -                        if (sh->child_errno[i] == ENOENT) -                                enoent_count++; -                } else { -                        if (type) { -                                if (type != sh->buf[i].ia_type) { -                                        gf_log (this->name, GF_LOG_DEBUG, -                                                "file %s is not recoverable " -                                                "automatically!", -                                                local->loc.path); - -                                        govinda_gOvinda = 1; -                                } -                        } else { -                                sh->source = i; -                                type = sh->buf[i].ia_type; -                        } -                } -        } +                if ((!local->child_up[i]) || sh->child_errno[i] != 0) +                        continue; -        if (govinda_gOvinda) { -                gf_log (this->name, GF_LOG_ERROR, -                        "conflicting filetypes exist for path %s. returning.", -                        local->loc.path); +                GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) || +                           uuid_is_null (sh->buf[i].ia_gfid)); -                local->govinda_gOvinda = 1; -                sh_missing_entries_finish (frame, this); -                return 0; -        } +                if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) || +                    (uuid_compare (sh->buf[i].ia_gfid, +                                   sh->entrybuf.ia_gfid))) +                        continue; + +                afr_fresh_children_add_child (sh->fresh_children, +                                              i, priv->child_count); -        if (!type) { -                gf_log (this->name, GF_LOG_ERROR, -                        "no source found for %s. all nodes down?. returning.", -                        local->loc.path); -                /* subvolumes down and/or file does not exist */ -                sh_missing_entries_finish (frame, this); -                return 0;          } +        afr_sh_purge_entry_common (frame, this, +                                   afr_sh_purge_stale_entry_condition); +} -        if (enoent_count == 0) { -                gf_log (this->name, GF_LOG_INFO, -                        "no missing files - %s. proceeding to metadata check", -                        local->loc.path); -                /* proceed to next step - metadata self-heal */ -                sh_missing_entries_finish (frame, this); -                return 0; +void +afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs, +                                     struct iatt *save, +                                     unsigned int child_count) +{ +        int             i = 0; +        int             child = 0; +        gf_boolean_t    saved = _gf_false; + +        GF_ASSERT (save); +        //if iatt buf with gfid exists sets it +        for (i = 0; i < child_count; i++) { +                child = children[i]; +                if (child == -1) +                        break; +                *save = bufs[child]; +                saved = _gf_true; +                if (!uuid_is_null (save->ia_gfid)) +                        break;          } +        GF_ASSERT (saved); +} -        switch (type) { -        case IA_IFSOCK: -        case IA_IFREG: -        case IA_IFBLK: -        case IA_IFCHR: -        case IA_IFIFO: -                sh_missing_entries_mknod (frame, this); -                break; -        case IA_IFLNK: -                sh_missing_entries_readlink (frame, this); -                break; -        case IA_IFDIR: -                sh_missing_entries_mkdir (frame, this); -                break; -        default: -                gf_log (this->name, GF_LOG_ERROR, -                        "%s: unknown file type: 0%o", local->loc.path, type); +void +afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t      *local = NULL; +        afr_self_heal_t  *sh = NULL; +        afr_private_t    *priv = NULL; +        int32_t          fresh_child_enoents = 0; +        int32_t          fresh_parent_count = 0; +        int32_t          op_errno = 0; + +        local = frame->local; +        sh = &local->self_heal; +        priv = this->private; + +        if (afr_get_children_count (sh->success_children, +                                    priv->child_count) == 0) { +                op_errno = afr_resultant_errno_get (NULL, sh->child_errno, +                                                    priv->child_count); +                goto fail; +        } + +        //make intersection of (success_children & fresh_parent_dirs) fresh_children +        //the other success_children will be added to it if they are not stale +        afr_children_intersection_get (sh->success_children, +                                       sh->fresh_parent_dirs, +                                       sh->sources, priv->child_count); +        afr_get_fresh_children (sh->success_children, sh->sources, +                                sh->fresh_children, priv->child_count); +        memset (sh->sources, 0, sizeof (*sh->sources) * priv->child_count); + +        fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs, +                                                     priv->child_count); +        //we need the enoent count of the subvols present in fresh_parent_dirs +        fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs, +                                               sh->child_errno, +                                               priv->child_count, ENOENT); +        if (fresh_child_enoents == fresh_parent_count) { +                afr_sh_set_error (sh, ENOENT); +                sh->op_failed = 1; +                afr_sh_purge_entry (frame, this); +        } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children, +                                            priv->child_count, local->loc.path, +                                            this->name)) { +                afr_sh_save_child_iatts_from_policy (sh->fresh_children, +                                                     sh->buf, &sh->entrybuf, +                                                     priv->child_count); +                afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf, +                                            sh->fresh_children, +                                            priv->child_count); +                afr_sh_purge_stale_entry (frame, this); +        } else { +                op_errno = EIO;                  local->govinda_gOvinda = 1; -                sh_missing_entries_finish (frame, this); +                goto fail;          } -        return 0; -} +        return; +fail: +        afr_sh_set_error (sh, op_errno); +        afr_sh_missing_entries_finish (frame, this); +        return; +}  static int -sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, -                               xlator_t *this, -                               int32_t op_ret, int32_t op_errno, -                               inode_t *inode, struct iatt *buf, dict_t *xattr, -                               struct iatt *postparent) +afr_sh_children_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                            int32_t op_ret, int32_t op_errno, inode_t *inode, +                            struct iatt *buf, dict_t *xattr, +                            struct iatt *postparent)  { -        int              child_index = 0; -        afr_local_t     *local = NULL;          int              call_count = 0; + +        afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, +                                           op_errno, inode, buf, xattr, +                                           postparent); +        call_count = afr_frame_return (frame); + +        if (call_count == 0) +                afr_sh_children_lookup_done (frame, this); + +        return 0; +} + +static int +afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this) +{ +        afr_self_heal_t *sh  = NULL;          afr_private_t   *priv = NULL; -        mode_t           st_mode = 0; +        afr_local_t     *local = NULL; +        int             enoent_count = 0; +        int             nsources = 0; +        int             source  = -1;          local = frame->local; +        sh = &local->self_heal;          priv = this->private; -        child_index = (long) cookie; - -        if (buf) -                st_mode = st_mode_from_ia (buf->ia_prot, buf->ia_type); +        /* If We can't find a fresh parent directory here, +         * we wont know which subvol is correct without finding a parent dir +         * upwards which has correct xattrs, for that we may have to +         * do lookups till root, we dont wanna do that, +         * instead make sure that if there are conflicting gfid +         * parent dirs, self-heal thus lookup is failed with EIO. +         * if there are missing entries we dont know whether to delete or +         * create so fail with EIO, +         * If there are conflicting xattr fail with EIO. +         */ +        if (afr_get_children_count (sh->success_children, +                                    priv->child_count) == 0) { +                gf_log (this->name, GF_LOG_ERROR, "Parent dir lookup failed " +                        "for %s, in missing entry self-heal, continuing with " +                        "the rest of the self-heals", local->loc.path); +                goto out; +        } -        LOCK (&frame->lock); -        { -                if (op_ret == 0) { -                        gf_log (this->name, GF_LOG_TRACE, -                                "path %s on subvolume %s is of mode 0%o", -                                local->loc.path, -                                priv->children[child_index]->name, -                                st_mode); +        enoent_count = afr_errno_count (NULL, sh->child_errno, +                                        priv->child_count, ENOENT); +        if (enoent_count > 0) { +                gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s," +                        " in missing entry self-heal, continuing with the rest" +                        " of the self-heals", local->loc.path); +                goto out; +        } -                        local->self_heal.buf[child_index] = *buf; -                        local->self_heal.parentbuf        = *postparent; -                } else { -                        gf_log (this->name, GF_LOG_INFO, -                                "path %s on subvolume %s => -1 (%s)", -                                local->loc.path, -                                priv->children[child_index]->name, -                                strerror (op_errno)); +        if (afr_conflicting_iattrs (sh->buf, sh->success_children, +                                    priv->child_count, sh->parent_loc.path, +                                    this->name)) { +                gf_log (this->name, GF_LOG_INFO, "conflicting stat info for " +                        "parent dirs of %s", local->loc.path); +                goto out; +        } -                        local->self_heal.child_errno[child_index] = op_errno; -                } +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->success_children, +                                      AFR_ENTRY_TRANSACTION); +        if (nsources < 0) { +                gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," +                        " in missing entry self-heal, continuing with the rest" +                        " of the self-heals", local->loc.path); +                goto out; +        } +        source = afr_sh_select_source (sh->sources, priv->child_count); +        if (source == -1) { +                GF_ASSERT (0); +                gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); +                goto out;          } -        UNLOCK (&frame->lock); +        afr_get_fresh_children (sh->success_children, sh->sources, +                                sh->fresh_parent_dirs, priv->child_count); +        afr_sh_common_lookup (frame, this, &local->loc, +                              afr_sh_children_lookup_cbk, _gf_false); +        return 0; +out: +        afr_sh_set_error (sh, EIO); +        sh->op_failed = 1; +        afr_sh_missing_entries_finish (frame, this); +        return 0; +} + +int +afr_sh_conflicting_entry_lookup_cbk (call_frame_t *frame, void *cookie, +                                     xlator_t *this, +                                     int32_t op_ret, int32_t op_errno, +                                     inode_t *inode, struct iatt *buf, +                                     dict_t *xattr, struct iatt *postparent) +{ +        int              call_count = 0; + +        afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, +                                           op_errno, inode, buf, xattr, +                                           postparent);          call_count = afr_frame_return (frame); -        if (call_count == 0) { -                sh_missing_entries_create (frame, this); -        } +        if (call_count == 0) +                afr_sh_find_fresh_parents (frame, this);          return 0;  } +void +afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count) +{ +        int             i = 0; -static int -sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) +        for (i = 0; i < child_count; i++) { +                memset (&sh->buf[i], 0, sizeof (sh->buf[i])); +                memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i])); +                sh->child_errno[i] = 0; +        } +        memset (&sh->parentbuf, 0, sizeof (sh->parentbuf)); +        sh->success_count = 0; +        afr_reset_children (sh->success_children, child_count); +        afr_reset_children (sh->fresh_children, child_count); +        afr_reset_xattr (sh->xattr, child_count); +} + +/* afr self-heal state will be lost if this call is made + * please check the afr_sh_common_reset that is called in this function + */ +int +afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, +                      afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid)  {          afr_local_t    *local = NULL;          int             i = 0;          int             call_count = 0;          afr_private_t  *priv = NULL;          dict_t         *xattr_req = NULL; -        int             ret = -1; +        afr_self_heal_t *sh = NULL;          local = frame->local;          priv  = this->private; +        sh    = &local->self_heal;          call_count = afr_up_children_count (priv->child_count,                                              local->child_up); @@ -1445,29 +1724,29 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)          xattr_req = dict_new();          if (xattr_req) { -                for (i = 0; i < priv->child_count; i++) { -                        ret = dict_set_uint64 (xattr_req, -                                               priv->pending_key[i], -                                               3 * sizeof(int32_t)); -                        if (ret < 0) -                                gf_log (this->name, GF_LOG_WARNING, -                                        "%s: failed to set value for %s", -                                        local->loc.path, priv->pending_key[i]); +                afr_xattr_req_prepare (this, xattr_req, loc->path); +                if (set_gfid) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "looking up %s with gfid: %s", +                                local->loc.path, uuid_utoa (sh->sh_gfid_req)); +                        GF_ASSERT (!uuid_is_null (sh->sh_gfid_req)); +                        afr_set_dict_gfid (xattr_req, sh->sh_gfid_req);                  }          } +        afr_sh_common_reset (sh, priv->child_count);          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) { -                        gf_log (this->name, GF_LOG_TRACE, +                        gf_log (this->name, GF_LOG_DEBUG,                                  "looking up %s on subvolume %s",                                  local->loc.path, priv->children[i]->name);                          STACK_WIND_COOKIE (frame, -                                           sh_missing_entries_lookup_cbk, +                                           lookup_cbk,                                             (void *) (long) i,                                             priv->children[i],                                             priv->children[i]->fops->lookup, -                                           &local->loc, xattr_req); +                                           loc, xattr_req);                          if (!--call_count)                                  break; @@ -1483,13 +1762,15 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)  int -afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) +afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; +        afr_self_heal_t     *sh       = NULL;          local    = frame->local;          int_lock = &local->internal_lock; +        sh       = &local->self_heal;          if (int_lock->lock_op_ret < 0) {                  gf_log (this->name, GF_LOG_INFO, @@ -1499,14 +1780,41 @@ afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)                  gf_log (this->name, GF_LOG_DEBUG,                          "Non blocking entrylks done. Proceeding to FOP"); -                sh_missing_entries_lookup (frame, this); +                afr_sh_common_lookup (frame, this, &sh->parent_loc, +                                      afr_sh_conflicting_entry_lookup_cbk, +                                      _gf_false);          }          return 0;  } -static int -afr_sh_entrylk (call_frame_t *frame, xlator_t *this) +int +afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this) +{ +        afr_internal_lock_t *int_lock = NULL; +        afr_local_t         *local    = NULL; + +        local    = frame->local; +        int_lock = &local->internal_lock; + +        if (int_lock->lock_op_ret < 0) { +                gf_log (this->name, GF_LOG_INFO, +                        "Non blocking entrylks failed."); +                afr_sh_missing_entries_done (frame, this); +        } else { +                gf_log (this->name, GF_LOG_DEBUG, +                        "Non blocking entrylks done. Proceeding to FOP"); +                afr_sh_common_lookup (frame, this, &local->loc, +                                      afr_sh_missing_entries_lookup_cbk, +                                      _gf_true); +        } + +        return 0; +} + +int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, +                char *base_name, afr_lock_cbk_t lock_cbk)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -1521,9 +1829,9 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this)          afr_set_lock_number (frame, this); -        int_lock->lk_basename = local->loc.name; -        int_lock->lk_loc      = &sh->parent_loc; -        int_lock->lock_cbk    = afr_sh_post_nonblocking_entrylk_cbk; +        int_lock->lk_basename = base_name; +        int_lock->lk_loc      = loc; +        int_lock->lock_cbk    = lock_cbk;          afr_nonblocking_entrylk (frame, this); @@ -1531,7 +1839,8 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this)  }  static int -afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) +afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this, +                              afr_lock_cbk_t lock_cbk)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -1547,9 +1856,27 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)                  "attempting to recreate missing entries for path=%s",                  local->loc.path); +        GF_ASSERT (local->loc.parent);          afr_build_parent_loc (&sh->parent_loc, &local->loc); -        afr_sh_entrylk (frame, this); +        afr_sh_entrylk (frame, this, &sh->parent_loc, NULL, +                        lock_cbk); +        return 0; +} + +static int +afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this) +{ +        afr_self_heal_parent_entrylk (frame, this, +                                      afr_sh_post_nb_entrylk_conflicting_sh_cbk); +        return 0; +} + +static int +afr_self_heal_gfids (call_frame_t *frame, xlator_t *this) +{ +        afr_self_heal_parent_entrylk (frame, this, +                                      afr_sh_post_nb_entrylk_gfid_sh_cbk);          return 0;  } @@ -1572,6 +1899,9 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)          shc = &lc->self_heal;          shc->unwind = sh->unwind; +        shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk; +        shc->need_missing_entry_self_heal = sh->need_missing_entry_self_heal; +        shc->need_gfid_self_heal = sh->need_gfid_self_heal;          shc->need_data_self_heal = sh->need_data_self_heal;          shc->need_metadata_self_heal = sh->need_metadata_self_heal;          shc->need_entry_self_heal = sh->need_entry_self_heal; @@ -1585,6 +1915,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)          shc->background = sh->background;          shc->type = sh->type; +        uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);          if (l->loc.path)                  loc_copy (&lc->loc, &l->loc); @@ -1648,6 +1979,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          if (local->govinda_gOvinda)                  split_brain = _gf_true; +          afr_set_split_brain (this, sh->inode, split_brain);          afr_self_heal_type_str_get (sh, sh_type_str, @@ -1764,11 +2096,15 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)          }          sh->success_children = afr_fresh_children_create (priv->child_count);          sh->fresh_children = afr_fresh_children_create (priv->child_count); +        sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count);          FRAME_SU_DO (sh_frame, afr_local_t); -        if (local->success_count && local->enoent_count) { -                afr_self_heal_missing_entries (sh_frame, this); +        if (sh->need_missing_entry_self_heal) { +                afr_self_heal_conflicting_entries (sh_frame, this); +        } else if (sh->need_gfid_self_heal) { +                GF_ASSERT (!uuid_is_null (sh->sh_gfid_req)); +                afr_self_heal_gfids (sh_frame, this);          } else {                  gf_log (this->name, GF_LOG_TRACE,                          "proceeding to metadata check on %s", @@ -1784,18 +2120,28 @@ void  afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,                              size_t size)  { -        GF_ASSERT (str && (size > strlen (" meta-data data entry"))); +        GF_ASSERT (str && (size > strlen (" missing-entry gfid " +                                          "meta-data data entry")));          if (self_heal_p->need_metadata_self_heal) { -                snprintf(str, size, " meta-data"); +                snprintf (str, size, " meta-data");          }          if (self_heal_p->need_data_self_heal) { -                snprintf(str + strlen(str), size - strlen(str), " data"); +                snprintf (str + strlen(str), size - strlen(str), " data");          }          if (self_heal_p->need_entry_self_heal) { -                snprintf(str + strlen(str), size - strlen(str), " entry"); +                snprintf (str + strlen(str), size - strlen(str), " entry"); +        } + +        if (self_heal_p->need_missing_entry_self_heal) { +                snprintf (str + strlen(str), size - strlen(str), +                         " missing-entry"); +        } + +        if (self_heal_p->need_gfid_self_heal) { +                snprintf (str + strlen(str), size - strlen(str), " gfid");          }  } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 7f6247455..c600db825 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -29,6 +29,11 @@ typedef enum {          AFR_SELF_HEAL_INVALID = -1,  } afr_self_heal_type; +typedef int +(*afr_lookup_cbk_t) (call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, inode_t *inode, +                     struct iatt *buf, dict_t *xattr, +                     struct iatt *postparent);  int  afr_sh_select_source (int sources[], int child_count); @@ -71,4 +76,23 @@ afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,  afr_self_heal_type  afr_self_heal_type_for_transaction (afr_transaction_type type); +int +afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs, +                   int32_t **pending_matrix, int32_t *sources, +                   int32_t *success_children, afr_transaction_type type); +void +afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count); +int +afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, +                      afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid); +int +afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, +                             int active_src, struct iatt *buf); +int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, +                char *base_name, afr_lock_cbk_t lock_cbk); +int +afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this, +                             int child_index, struct iatt *buf, +                             struct iatt *postparent);  #endif /* __AFR_SELF_HEAL_COMMON_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 7179e929d..6d16b170f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -282,7 +282,7 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; -        gf_log (this->name, GF_LOG_TRACE, +        gf_log (this->name, GF_LOG_DEBUG,                  "finishing data selfheal of %s", local->loc.path);          if (!sh->data_lock_held) @@ -607,18 +607,11 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)          sh = &local->self_heal;          priv = this->private; -        afr_build_pending_matrix (priv->pending_key, sh->pending_matrix, -                                  sh->xattr, AFR_DATA_TRANSACTION, -                                  priv->child_count); - -        afr_sh_print_pending_matrix (sh->pending_matrix, this); - -        nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, -                                     priv->child_count, AFR_SELF_HEAL_DATA, -                                     sh->success_children, this->name); - +        nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, +                                      sh->sources, sh->success_children, +                                      AFR_DATA_TRANSACTION);          if (nsources == 0) { -                gf_log (this->name, GF_LOG_TRACE, +                gf_log (this->name, GF_LOG_DEBUG,                          "No self-heal needed for %s",                          local->loc.path); @@ -760,7 +753,6 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          afr_private_t            *priv      = NULL;          int                      read_child = -1;          int                      ret        = -1; -        afr_self_heal_type       sh_type    = AFR_SELF_HEAL_INVALID;          int32_t                  **pending_matrix = NULL;          int32_t                  *sources         = NULL;          int32_t                  *success_children   = NULL; @@ -784,16 +776,8 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          if (NULL == sources)                  goto out; -        afr_build_pending_matrix (priv->pending_key, pending_matrix, -                                  xattr, txn_type, priv->child_count); - -        sh_type = afr_self_heal_type_for_transaction (txn_type); -        if (AFR_SELF_HEAL_INVALID == sh_type) -                goto out; - -        nsources = afr_mark_sources (sources, pending_matrix, bufs, -                                     priv->child_count, sh_type, -                                     success_children, this->name); +        nsources = afr_build_sources (this, xattr, bufs, pending_matrix, +                                      sources, success_children, txn_type);          if (nsources < 0) {                  ret = -1;                  goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 7e43c4277..4a5e7531d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -50,9 +50,6 @@  #include "afr-self-heal-common.h"  int -afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this); - -int  afr_sh_entry_done (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL; @@ -2267,16 +2264,10 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)                  goto heal;          } -        afr_build_pending_matrix (priv->pending_key, sh->pending_matrix, -                                  sh->xattr, AFR_ENTRY_TRANSACTION, -                                  priv->child_count); - -        afr_sh_print_pending_matrix (sh->pending_matrix, this); - -        nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, -                                     priv->child_count, AFR_SELF_HEAL_ENTRY, -                                     sh->success_children, this->name); - +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->success_children, +                                      AFR_ENTRY_TRANSACTION);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE,                          "No self-heal needed for %s", @@ -2340,62 +2331,6 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,          return 0;  } - - -int -afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this) -{ -        afr_local_t    *  local = NULL; -        afr_private_t  *  priv  = NULL; -        dict_t         *xattr_req = NULL; -        int ret = 0; -        int call_count = 0; -        int i = 0; -        afr_self_heal_t *sh = NULL; - -        priv  = this->private; -        local = frame->local; -        sh = &local->self_heal; - -        call_count = afr_up_children_count (priv->child_count, -                                            local->child_up); - -        local->call_count = call_count; - -        xattr_req = dict_new(); -        if (xattr_req) { -                for (i = 0; i < priv->child_count; i++) { -                        ret = dict_set_uint64 (xattr_req, -                                               priv->pending_key[i], -                                               3 * sizeof(int32_t)); -                        if (ret < 0) -                                gf_log (this->name, GF_LOG_WARNING, -                                        "%s: Unable to set dict value.", -                                        local->loc.path); -                } -        } - -        afr_reset_children (sh->success_children, priv->child_count); -        sh->success_count = 0; -        for (i = 0; i < priv->child_count; i++) { -                if (local->child_up[i]) { -                        STACK_WIND_COOKIE (frame, -                                           afr_sh_entry_lookup_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->lookup, -                                           &local->loc, xattr_req); -                        if (!--call_count) -                                break; -                } -        } - -        if (xattr_req) -                dict_unref (xattr_req); - -        return 0; -} -  int  afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)  { @@ -2416,38 +2351,14 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)                  gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "                          "for %s. Proceeding to FOP", local->loc.path); -                afr_sh_entry_lookup(frame, this); +                afr_sh_common_lookup (frame, this, &local->loc, +                                      afr_sh_entry_lookup_cbk, _gf_false);          }          return 0;  }  int -afr_sh_entry_lock (call_frame_t *frame, xlator_t *this) -{ -        afr_internal_lock_t *int_lock = NULL; -        afr_local_t         *local    = NULL; - -        local    = frame->local; -        int_lock = &local->internal_lock; - -        int_lock->transaction_lk_type = AFR_SELFHEAL_LK; -        int_lock->selfheal_lk_type    = AFR_ENTRY_SELF_HEAL_LK; - -        afr_set_lock_number (frame, this); - -        int_lock->lk_basename = NULL; -        int_lock->lk_loc      = &local->loc; -        int_lock->lock_cbk    = afr_sh_post_nonblocking_entry_cbk; - -        afr_nonblocking_entrylk (frame, this); - - -        return 0; -} - - -int  afr_self_heal_entry (call_frame_t *frame, xlator_t *this)  {          afr_local_t   *local = NULL; @@ -2458,7 +2369,8 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)          local = frame->local;          if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) { -                afr_sh_entry_lock (frame, this); +                afr_sh_entrylk (frame, this, &local->loc, NULL, +                                afr_sh_post_nonblocking_entry_cbk);          } else {                  gf_log (this->name, GF_LOG_TRACE,                          "proceeding to completion on %s", diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 5993e9596..04c5ef4e1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -492,16 +492,10 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)          sh = &local->self_heal;          priv = this->private; -        afr_build_pending_matrix (priv->pending_key, sh->pending_matrix, -                                  sh->xattr, AFR_METADATA_TRANSACTION, -                                  priv->child_count); - -        afr_sh_print_pending_matrix (sh->pending_matrix, this); - -        nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, -                                     priv->child_count, AFR_SELF_HEAL_METADATA, -                                     sh->success_children, this->name); - +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->success_children, +                                      AFR_METADATA_TRANSACTION);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE,                          "No self-heal needed for %s", @@ -631,63 +625,6 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          return 0;  } - -int -afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this) -{ -        afr_local_t     *local = NULL; -        afr_private_t   *priv = NULL; -        int              i = 0; -        int              call_count = 0; -        dict_t          *xattr_req = NULL; -        int              ret = 0; -        afr_self_heal_t *sh = NULL; - -        local = frame->local; -        priv = this->private; -        sh = &local->self_heal; - -        call_count = afr_up_children_count (priv->child_count, -                                            local->child_up); -        local->call_count = call_count; - -        xattr_req = dict_new(); - -        if (xattr_req) { -                for (i = 0; i < priv->child_count; i++) { -                        ret = dict_set_uint64 (xattr_req, -                                               priv->pending_key[i], -                                               3 * sizeof(int32_t)); -                        if (ret < 0) -                                gf_log (this->name, GF_LOG_WARNING, -                                        "Unable to set dict value."); -                } -        } - -        afr_reset_children (sh->success_children, priv->child_count); -        sh->success_count = 0; -        for (i = 0; i < priv->child_count; i++) { -                if (local->child_up[i]) { -                        gf_log (this->name, GF_LOG_TRACE, -                                "looking up %s on %s", -                                local->loc.path, priv->children[i]->name); - -                        STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->lookup, -                                           &local->loc, xattr_req); -                        if (!--call_count) -                                break; -                } -        } - -        if (xattr_req) -                dict_unref (xattr_req); - -        return 0; -} -  int  afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,                                                xlator_t *this) @@ -709,7 +646,8 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,                  gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "                          "inodelks done for %s. Proceeding to FOP",                          local->loc.path); -                afr_sh_metadata_lookup (frame, this); +                afr_sh_common_lookup (frame, this, &local->loc, +                                      afr_sh_metadata_lookup_cbk, _gf_false);          }          return 0;  | 
