diff options
| author | Pranith K <pranithk@gluster.com> | 2011-07-14 08:07:04 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-07-17 07:45:05 -0700 | 
| commit | bfc0e16e43815ab6d6e67f4bd26694ebd72b3360 (patch) | |
| tree | 221eff40a09ce8e42ab6460bc9040d5b5f9f41fb /xlators | |
| parent | 64b2a56ad0f8ddae9ece8696f7d50a3129c145a3 (diff) | |
cluster/afr: Add fresh children along with read-child to inode context
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2840 (files not getting self-healed when the first child goes down)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 680 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 134 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 55 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 12 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-mem-types.h | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 18 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 88 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 30 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 57 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/pump.c | 9 | 
16 files changed, 817 insertions, 353 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index b753cbfa85c..e8afc6d8de6 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -87,187 +87,444 @@ out:          return ret;  } -uint64_t -afr_is_split_brain (xlator_t *this, inode_t *inode) +afr_inode_ctx_t* +afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)  { -        int ret = 0; +        int             ret  = -1; +        afr_inode_ctx_t *ctx = NULL; +        size_t          size = 0; -        uint64_t ctx         = 0; -        uint64_t split_brain = 0; +        GF_ASSERT (child_count > 0); -        VALIDATE_OR_GOTO (inode, out); +        if (!addr) { +                ctx = GF_CALLOC (1, sizeof (*ctx), +                                 gf_afr_mt_inode_ctx_t); +                if (!ctx) +                        goto out; +                size = sizeof (*ctx->fresh_children); +                ctx->fresh_children = GF_CALLOC (child_count, size, +                                                 gf_afr_mt_int32_t); +                if (!ctx->fresh_children) +                        goto out; +        } else { +                ctx = (afr_inode_ctx_t*) (long) addr; +        } +        ret = 0; +out: +        if (ret && ctx) { +                if (ctx->fresh_children) +                        GF_FREE (ctx->fresh_children); +                GF_FREE (ctx); +                ctx = NULL; +        } +        return ctx; +} + +void +afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) +{ +        GF_ASSERT (inode); +        GF_ASSERT (params); +        int             ret = 0; +        afr_inode_ctx_t *ctx = NULL; +        afr_private_t   *priv = NULL; +        int             i = 0; +        uint64_t        ctx_addr   = 0; +        int32_t         read_child = -1; +        int32_t         *fresh_children = NULL; + +        priv = this->private;          LOCK (&inode->lock);          { -                ret = __inode_ctx_get (inode, this, &ctx); - +                ret = __inode_ctx_get (inode, this, &ctx_addr);                  if (ret < 0)                          goto unlock; - -                split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK; +                ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); +                if (!ctx) +                        goto unlock; +                switch (params->mask_type) { +                case AFR_ICTX_READ_CHILD_MASK: +                        fresh_children = params->u.read_ctx.fresh_children; +                        read_child = (int32_t)(ctx->masks & +                                               AFR_ICTX_READ_CHILD_MASK); +                        params->u.read_ctx.read_child = read_child; +                        if (!fresh_children) +                                goto unlock; +                        for (i = 0; i < priv->child_count; i++) +                                fresh_children[i] = ctx->fresh_children[i]; +                        break; +                case AFR_ICTX_OPENDIR_DONE_MASK: +                        params->u.value = ctx->masks & +                                          AFR_ICTX_OPENDIR_DONE_MASK; +                        break; +                case AFR_ICTX_SPLIT_BRAIN_MASK: +                        params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK; +                        break; +                }          }  unlock:          UNLOCK (&inode->lock); +} -out: -        return split_brain; +uint64_t +afr_is_split_brain (xlator_t *this, inode_t *inode) +{ +        afr_inode_params_t params = {0}; + +        params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK; +        afr_inode_get_ctx (this, inode, ¶ms); +        return params.u.value; +} + +gf_boolean_t +afr_is_opendir_done (xlator_t *this, inode_t *inode) +{ +        afr_inode_params_t params = {0}; + +        params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK; +        afr_inode_get_ctx (this, inode, ¶ms); +        return params.u.value;  } +int32_t +afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children) +{ +        afr_inode_params_t      params = {0}; + +        params.mask_type                  = AFR_ICTX_READ_CHILD_MASK; +        params.u.read_ctx.fresh_children = fresh_children; +        afr_inode_get_ctx (this, inode, ¶ms); +        return params.u.read_ctx.read_child; +} +  void -afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) +afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child, +                            int32_t *fresh_children, int32_t child_count)  { -        uint64_t ctx = 0; -        int      ret = 0; +        uint64_t        rest_of_mask = 0; +        uint64_t        mask         = 0; +        int             i            = 0; -        VALIDATE_OR_GOTO (inode, out); +        rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks); +        mask = (AFR_ICTX_READ_CHILD_MASK & read_child); +        ctx->masks = rest_of_mask | mask; -        LOCK (&inode->lock); -        { -                ret = __inode_ctx_get (inode, this, &ctx); +        /* avoid memcpy as int, int32_t are used interchangeably +         */ +        for (i = 0; i < child_count; i++) { +                if (fresh_children) +                        ctx->fresh_children[i] = fresh_children[i]; +                else +                        ctx->fresh_children[i] = -1; +        } +} -                if (ret < 0) { -                        ctx = 0; -                } +void +afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx) +{ +        uint64_t        rest_of_mask = 0; +        uint64_t        mask = 0; -                if (set) { -                        ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx) -                                | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); -                } else { -                        ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx); -                } +        rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks); +        mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); +        ctx->masks = rest_of_mask | mask; +} -                ret = __inode_ctx_put (inode, this, ctx); -                if (ret) { -                        gf_log_callingfn (this->name, GF_LOG_INFO, -                                          "failed to set the inode ctx (%s)", -                                          uuid_utoa (inode->gfid)); -                } +void +afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set) +{ +        uint64_t        rest_of_mask = 0; +        uint64_t        mask = 0; + +        if (set) { +                rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); +                mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); +                ctx->masks = rest_of_mask | mask; +        } else { +                ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);          } -        UNLOCK (&inode->lock); -out: -        return;  } - -uint64_t -afr_is_opendir_done (xlator_t *this, inode_t *inode) +void +afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)  { -        int      ret          = 0; -        uint64_t ctx          = 0; -        uint64_t opendir_done = 0; +        GF_ASSERT (inode); +        GF_ASSERT (params); -        VALIDATE_OR_GOTO (inode, out); +        int             ret = 0; +        afr_inode_ctx_t *ctx            = NULL; +        afr_private_t   *priv           = NULL; +        uint64_t        ctx_addr        = 0; +        gf_boolean_t    set             = _gf_false; +        int32_t         read_child      = -1; +        int32_t         *fresh_children = NULL; +        priv = this->private;          LOCK (&inode->lock);          { -                ret = __inode_ctx_get (inode, this, &ctx); - +                ret = __inode_ctx_get (inode, this, &ctx_addr);                  if (ret < 0) +                        ctx_addr = 0; +                ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); +                if (!ctx)                          goto unlock; - -                opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK; +                switch (params->mask_type) { +                case AFR_ICTX_READ_CHILD_MASK: +                        read_child = params->u.read_ctx.read_child; +                        fresh_children = params->u.read_ctx.fresh_children; +                        afr_inode_ctx_set_read_ctx (ctx, read_child, +                                                    fresh_children, +                                                    priv->child_count); +                        break; +                case AFR_ICTX_OPENDIR_DONE_MASK: +                        afr_inode_ctx_set_opendir_done (ctx); +                        break; +                case AFR_ICTX_SPLIT_BRAIN_MASK: +                        set = params->u.value; +                        afr_inode_ctx_set_splitbrain (ctx, set); +                        break; +                } +                ret = __inode_ctx_put (inode, this, (uint64_t)ctx); +                if (ret) { +                        gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " +                                          "set the inode ctx (%s)", +                                          uuid_utoa (inode->gfid)); +                }          }  unlock:          UNLOCK (&inode->lock); - -out: -        return opendir_done;  } +void +afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) +{ +        afr_inode_params_t      params = {0}; + +        params.mask_type        = AFR_ICTX_SPLIT_BRAIN_MASK; +        params.u.value          = set; +        afr_inode_set_ctx (this, inode, ¶ms); +}  void  afr_set_opendir_done (xlator_t *this, inode_t *inode)  { -        uint64_t ctx = 0; -        int      ret = 0; +        afr_inode_params_t params = {0}; -        VALIDATE_OR_GOTO (inode, out); +        params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK; +        afr_inode_set_ctx (this, inode, ¶ms); +} -        LOCK (&inode->lock); -        { -                ret = __inode_ctx_get (inode, this, &ctx); +void +afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child, +                        int32_t *fresh_children) +{ +        afr_inode_params_t params = {0}; -                if (ret < 0) { -                        ctx = 0; -                } +        GF_ASSERT (read_child >= 0); +        GF_ASSERT (fresh_children); -                ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx) -                        | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); +        params.mask_type                  = AFR_ICTX_READ_CHILD_MASK; +        params.u.read_ctx.read_child     = read_child; +        params.u.read_ctx.fresh_children = fresh_children; +        afr_inode_set_ctx (this, inode, ¶ms); +} -                ret = __inode_ctx_put (inode, this, ctx); -                if (ret) { -                        gf_log_callingfn (this->name, GF_LOG_INFO, -                                          "failed to set the inode ctx (%s)", -                                          uuid_utoa (inode->gfid)); -                } +gf_boolean_t +afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child) +{ +        gf_boolean_t             source_xattrs = _gf_false; + +        GF_ASSERT (child < child_count); + +        if ((child >= 0) && (child < child_count) && +             sources[child]) { +                source_xattrs = _gf_true;          } -        UNLOCK (&inode->lock); -out: -        return; +        return source_xattrs;  } +gf_boolean_t +afr_is_success_child (int32_t *success_children, int32_t child_count, +                      int32_t child) +{ +        gf_boolean_t             success_child = _gf_false; +        int                      i = 0; -uint64_t -afr_read_child (xlator_t *this, inode_t *inode) +        GF_ASSERT (child < child_count); + +        for (i = 0; i < child_count; i++) { +                if (success_children[i] == -1) +                        break; +                if (child == success_children[i]) { +                        success_child = _gf_true; +                        break; +                } +        } +        return success_child; +} + +gf_boolean_t +afr_is_read_child (int32_t *success_children, int32_t *sources, +                   int32_t child_count, int32_t child)  { -        int ret = 0; +        gf_boolean_t             success_child = _gf_false; +        gf_boolean_t             source        = _gf_false; -        uint64_t ctx         = 0; -        uint64_t read_child  = 0; +        GF_ASSERT (success_children); +        GF_ASSERT (child_count > 0); -        VALIDATE_OR_GOTO (inode, out); +        success_child = afr_is_success_child (success_children, child_count, +                                              child); +        if (!success_child) +                goto out; +        if (NULL == sources) { +                source = _gf_true; +                goto out; +        } +        source = afr_is_source_child (sources, child_count, child); +out: +        return (success_child && source); +} -        LOCK (&inode->lock); -        { -                ret = __inode_ctx_get (inode, this, &ctx); +/* If sources is NULL the xattrs are assumed to be of source for all + * success_children. + */ +int +afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count, +                                   int32_t prev_read_child, +                                   int32_t config_read_child, int32_t *sources) +{ +        int32_t                  read_child   = -1; +        int                      i            = 0; -                if (ret < 0) -                        goto unlock; +        GF_ASSERT (success_children); + +        read_child = prev_read_child; +        if (afr_is_read_child (success_children, sources, child_count, +                               read_child)) +                goto out; + +        read_child = config_read_child; +        if (afr_is_read_child (success_children, sources, child_count, +                               read_child)) +                goto out; -                read_child = ctx & AFR_ICTX_READ_CHILD_MASK; +        for (i = 0; i < child_count; i++) { +                read_child = success_children[i]; +                if (read_child < 0) +                        break; +                if (afr_is_read_child (success_children, sources, child_count, +                                       read_child)) +                        goto out;          } -unlock: -        UNLOCK (&inode->lock); +        read_child = -1;  out:          return read_child;  } - +/* This function should be used when all the success_children are sources + */  void -afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child) +afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode, +                              int32_t *fresh_children, int32_t prev_read_child, +                              int32_t config_read_child)  { -        uint64_t ctx = 0; -        int      ret = 0; +        int                      read_child = -1; +        afr_private_t            *priv = NULL; -        VALIDATE_OR_GOTO (inode, out); +        priv = this->private; +        read_child = afr_select_read_child_from_policy (fresh_children, +                                                        priv->child_count, +                                                        prev_read_child, +                                                        config_read_child, +                                                        NULL); +        afr_inode_set_read_ctx (this, inode, read_child, fresh_children); +} + +/* afr_next_call_child () + * This is a common function used by all the read-type fops + * This function should not be called with the inode's read_children array. + * The fop's handler should make a copy of the inode's read_children, + * preferred read_child into the local vars, because while this function is + * in execution there is a chance for inode's read_ctx to change. + */ +int32_t +afr_next_call_child (int32_t *fresh_children, size_t child_count, +                     int32_t *last_index, int32_t read_child) +{ +        int             next_index      = 0; +        int32_t         next_call_child = -1; -        LOCK (&inode->lock); -        { -                ret = __inode_ctx_get (inode, this, &ctx); +        GF_ASSERT (last_index); -                if (ret < 0) { -                        ctx = 0; -                } +        next_index = *last_index; +retry: +        next_index++; +        if (next_index >= child_count) +                goto out; +        if (fresh_children[next_index] == read_child) +                goto retry; +        if (fresh_children[next_index] == -1) +                goto out; +        *last_index = next_index; +        next_call_child = fresh_children[next_index]; +out: +        return next_call_child; +} -                ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx) -                        | (AFR_ICTX_READ_CHILD_MASK & read_child); + /* This function should not be called with the inode's read_children array. + * The fop's handler should make a copy of the inode's read_children, + * preferred read_child into the local vars, because while this function is + * in execution there is a chance for inode's read_ctx to change. + */ +int32_t +afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child, +                    int32_t *fresh_children, +                    int32_t *call_child, int32_t *last_index) +{ +        int             ret   = 0; +        afr_private_t   *priv = NULL; +        int             i     = 0; -                ret = __inode_ctx_put (inode, this, ctx); -                if (ret) { -                        gf_log_callingfn (this->name, GF_LOG_INFO, -                                          "failed to set the inode ctx (%s)", -                                          uuid_utoa (inode->gfid)); +        GF_ASSERT (child_up); +        GF_ASSERT (call_child); +        GF_ASSERT (last_index); +        GF_ASSERT (fresh_children); +        GF_ASSERT (read_child >= 0); + +        priv = this->private; +        *call_child = -1; +        *last_index = -1; + +        if (child_up[read_child]) { +                *call_child = read_child; +        } else { +                for (i = 0; i < priv->child_count; i++) { +                        if (fresh_children[i] == -1) +                                break; +                        if (child_up[fresh_children[i]]) { +                                *call_child = fresh_children[i]; +                                ret = 0; +                                break; +                        }                  } -        } -        UNLOCK (&inode->lock); +                if (*call_child == -1) { +                        ret = -ENOTCONN; +                        goto out; +                } + +                *last_index = i; +        }  out: -        return; +        gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, " +                "last_index: %d", ret, *call_child, *last_index); +        return ret;  } -  void  afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)  { @@ -325,8 +582,12 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)          if (sh->linkname)                  GF_FREE ((char *)sh->linkname); -        if (sh->child_success) -                GF_FREE (sh->child_success); + +        if (sh->success_children) +                GF_FREE (sh->success_children); + +        if (sh->fresh_children) +                GF_FREE (sh->fresh_children);          loc_wipe (&sh->parent_loc);  } @@ -398,6 +659,9 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)          if (local->child_up)                  GF_FREE (local->child_up); +        if (local->fresh_children) +                GF_FREE (local->fresh_children); +          { /* lookup */                  if (local->cont.lookup.xattrs) {                          for (i = 0; i < priv->child_count; i++) { @@ -424,8 +688,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)                  if (local->cont.lookup.bufs)                          GF_FREE (local->cont.lookup.bufs); -                if (local->cont.lookup.child_success) -                        GF_FREE (local->cont.lookup.child_success); +                if (local->cont.lookup.success_children) +                        GF_FREE (local->cont.lookup.success_children);                  if (local->cont.lookup.sources)                          GF_FREE (local->cont.lookup.sources); @@ -734,20 +998,21 @@ int  afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,                                int32_t *read_child)  { -        int32_t                 source = -1; -        ia_type_t               ia_type = 0; -        int                     ret = -1; -        afr_transaction_type    type = AFR_METADATA_TRANSACTION; -        dict_t                  **xattrs = NULL; -        int32_t                 *child_success = NULL; -        struct iatt             *bufs = NULL; +        int32_t                 source         = -1; +        ia_type_t               ia_type        = 0; +        int                     ret            = -1; +        afr_transaction_type    type           = AFR_METADATA_TRANSACTION; +        dict_t                  **xattrs       = NULL; +        int32_t                 *success_children = NULL; +        struct iatt             *bufs          = NULL;          GF_ASSERT (local);          GF_ASSERT (this); +        GF_ASSERT (local->success_count > 0);          bufs = local->cont.lookup.bufs; -        child_success = local->cont.lookup.child_success; -        ia_type = local->cont.lookup.bufs[child_success[0]].ia_type; +        success_children = local->cont.lookup.success_children; +        ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;          if (IA_ISDIR (ia_type)) {                  type = AFR_ENTRY_TRANSACTION;          } else if (IA_ISREG (ia_type)) { @@ -773,7 +1038,7 @@ afr_is_self_heal_running (afr_local_t *local)  }  static void -afr_launch_self_heal (call_frame_t *frame, xlator_t *this, +afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,                        gf_boolean_t is_background, ia_type_t ia_type,                        int (*unwind) (call_frame_t *frame, xlator_t *this))  { @@ -782,6 +1047,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this,          GF_ASSERT (frame);          GF_ASSERT (this); +        GF_ASSERT (inode);          local = frame->local;          local->self_heal.background = is_background; @@ -796,7 +1062,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this,                  "background %s self-heal triggered. path: %s",                  sh_type_str, local->loc.path); -        afr_self_heal (frame, this); +        afr_self_heal (frame, this, inode);  }  static void @@ -813,8 +1079,8 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)          bufs = local->cont.lookup.bufs;          for (i = 1; i < local->success_count; i++) { -                child1 = local->cont.lookup.child_success[i-1]; -                child2 = local->cont.lookup.child_success[i];; +                child1 = local->cont.lookup.success_children[i-1]; +                child2 = local->cont.lookup.success_children[i];                  afr_detect_self_heal_by_iatt (local, this,                                                &bufs[child1], &bufs[child2]);          } @@ -822,7 +1088,7 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)          xattr = local->cont.lookup.xattrs;          priv  = this->private;          for (i = 0; i < local->success_count; i++) { -                child1 = local->cont.lookup.child_success[i];; +                child1 = local->cont.lookup.success_children[i];                  afr_lookup_detect_self_heal_by_xattr (local, this,                                                        xattr[child1]);          } @@ -860,8 +1126,8 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,                          goto out;                  } -                afr_launch_self_heal (frame, this, _gf_true, -                                      local->cont.lookup.buf.ia_type, +                afr_launch_self_heal (frame, this, local->cont.lookup.inode, +                                      _gf_true, local->cont.lookup.buf.ia_type,                                        afr_self_heal_lookup_unwind);                  *sh_launched = _gf_true;          } @@ -875,22 +1141,22 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this)          int             i              = 0;          gf_boolean_t    symptom        = _gf_false;          struct iatt     *bufs          = NULL; -        int32_t         *child_success = NULL; +        int32_t         *success_children = NULL;          struct iatt     *child1        = NULL;          struct iatt     *child2        = NULL;          const char      *path          = NULL;          bufs = local->cont.lookup.bufs; -        child_success = local->cont.lookup.child_success; +        success_children = local->cont.lookup.success_children;          for (i = 1; i < local->success_count; i++) { -                child1 = &bufs[child_success[i-1]]; -                child2 = &bufs[child_success[i]]; +                child1 = &bufs[success_children[i-1]]; +                child2 = &bufs[success_children[i]];                  /*                   * TODO: gfid self-heal                   * if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) {                   *        gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs"                   *                " on subvolumes (%d, %d)", local->loc.path, -                 *                child_success[i-1], child_success[i]); +                 *                success_children[i-1], success_children[i]);                   *        symptom = _gf_true;                   * }                   */ @@ -899,7 +1165,7 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this)                          path = local->loc.path;                          gf_log (this->name, GF_LOG_WARNING, "%s: filetype "                                  "differs on subvolumes (%d, %d)", path, -                                child_success[i-1], child_success[i]); +                                success_children[i-1], success_children[i]);                          symptom = _gf_true;                          local->govinda_gOvinda = 1;                  } @@ -909,13 +1175,42 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this)          return symptom;  } +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, +                        int32_t *fresh_children, unsigned int child_count) +{ +        unsigned int i = 0; +        unsigned int j = 0; + +        GF_ASSERT (success_children); +        GF_ASSERT (sources); +        GF_ASSERT (fresh_children); + +        for (i = 0; i < child_count; i++) { +                if (success_children[i] == -1) +                        break; +                if (afr_is_read_child (success_children, sources, child_count, +                                       success_children[i])) { +                        fresh_children[j] = success_children[i]; +                        j++; +                } +        } +} +  static int -afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_child) +afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)  { +        afr_private_t           *priv = NULL; +          GF_ASSERT (read_child >= 0); -        afr_set_read_child (this, local->cont.lookup.inode, read_child); +        priv = this->private;          local->cont.lookup.read_child = read_child; +        afr_get_fresh_children (local->cont.lookup.success_children, +                                local->cont.lookup.sources, +                                local->fresh_children, priv->child_count); +        afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child, +                                local->fresh_children);          return 0;  } @@ -949,7 +1244,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)                  goto unwind;          } -        ret = afr_lookup_set_read_child (local, this, read_child); +        ret = afr_lookup_set_read_ctx (local, this, read_child);          if (ret)                  goto unwind; @@ -1070,7 +1365,7 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind          afr_lookup_cache_args (local, child_index, xattr,                                 buf, postparent); -        local->cont.lookup.child_success[local->success_count] = child_index; +        local->cont.lookup.success_children[local->success_count] = child_index;          local->success_count++;  } @@ -1114,9 +1409,8 @@ int  afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)  {          int               ret            = -ENOMEM; -        int32_t           *child_success = NULL;          struct iatt       *iatts         = NULL; -        int               i              = 0; +        int32_t           *success_children = NULL;          GF_ASSERT (local);          local->cont.lookup.xattrs = GF_CALLOC (child_count, @@ -1135,14 +1429,14 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)                  goto out;          local->cont.lookup.bufs = iatts; -        child_success = GF_CALLOC (child_count, sizeof (*child_success), -                                   gf_afr_mt_char); -        if (NULL == child_success) +        success_children = afr_fresh_children_create (child_count); +        if (NULL == success_children)                  goto out; -        for (i = 0; i < child_count; i++) -                child_success[i] = -1; +        local->cont.lookup.success_children = success_children; -        local->cont.lookup.child_success = child_success; +        local->fresh_children = afr_fresh_children_create (child_count); +        if (NULL == local->fresh_children) +                goto out;          local->cont.lookup.read_child = -1;          ret = 0; @@ -1181,8 +1475,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this,          if (ret == 0) {                  /* lookup is a revalidate */ -                local->read_child_index          = afr_read_child (this, -                                                                   loc->inode); +                local->read_child_index = afr_inode_get_read_ctx (this, loc->inode, +                                                                  NULL);          } else {                  LOCK (&priv->read_child_lock);                  { @@ -1611,7 +1905,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local; -        read_child = afr_read_child (this, local->fd->inode); +        read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);          LOCK (&frame->lock);          { @@ -2533,6 +2827,25 @@ out:  }  int +afr_forget (xlator_t *this, inode_t *inode) +{ +        uint64_t        ctx_addr = 0; +        afr_inode_ctx_t *ctx     = NULL; + +        inode_ctx_get (inode, this, &ctx_addr); + +        if (!ctx_addr) +                goto out; + +        ctx = (afr_inode_ctx_t *)(long)ctx_addr; +        if (ctx->fresh_children) +                GF_FREE (ctx->fresh_children); +        GF_FREE (ctx); +out: +        return 0; +} + +int  afr_priv_dump (xlator_t *this)  {          afr_private_t *priv = NULL; @@ -2759,6 +3072,16 @@ out:  int  AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)  { +        local->op_ret = -1; +        local->op_errno = EUCLEAN; +        local->call_count = afr_up_children_count (priv->child_count, +                                                   priv->child_up); +        if (local->call_count == 0) { +                gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up"); +                return -ENOTCONN; +        } + +          local->child_up = GF_CALLOC (sizeof (*local->child_up),                                       priv->child_count,                                       gf_afr_mt_char); @@ -2769,16 +3092,6 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)          memcpy (local->child_up, priv->child_up,                  sizeof (*local->child_up) * priv->child_count); -        local->call_count = afr_up_children_count (priv->child_count, -                                                   local->child_up); -        local->op_ret = -1; -        local->op_errno = EUCLEAN; - -        if (local->call_count == 0) { -                gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up"); -                return -ENOTCONN; -        } -          return 0;  } @@ -2849,6 +3162,10 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)          if (!local->pending)                  goto out; +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) +                goto out; +          for (i = 0; i < priv->child_count; i++) {                  local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),                                                 3, /* data + metadata + entry */ @@ -2867,3 +3184,50 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)  out:          return ret;  } + +void +afr_reset_children (int32_t *fresh_children, int32_t child_count) +{ +        unsigned int i = 0; +        for (i = 0; i < child_count; i++) +                fresh_children[i] = -1; +} + +int32_t* +afr_fresh_children_create (int32_t child_count) +{ +        int32_t           *fresh_children = NULL; +        int               i               = 0; + +        GF_ASSERT (child_count > 0); + +        fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children), +                                    gf_afr_mt_int32_t); +        if (NULL == fresh_children) +                goto out; +        for (i = 0; i < child_count; i++) +                fresh_children[i] = -1; +out: +        return fresh_children; +} + +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, +                              int32_t child_count) +{ +        gf_boolean_t child_found = _gf_false; +        int          i               = 0; + +        for (i = 0; i < child_count; i++) { +                if (fresh_children[i] == -1) +                        break; +                if (fresh_children[i] == child) { +                        child_found = _gf_true; +                        break; +                } +        } +        if (!child_found) { +                GF_ASSERT (i < child_count); +                fresh_children[i] = child; +        } +} diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 1bd2cc96392..8593d0c14c5 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -177,7 +177,7 @@ out:                                  " forced merge option set",                                  sh_type_str, local->loc.path); -                        afr_self_heal (frame, this); +                        afr_self_heal (frame, this, local->fd->inode);                  } else {                          afr_set_opendir_done (this, local->fd->inode); diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 88c3f728f49..6da666804de 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -119,13 +119,14 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                       fd_t *fd, inode_t *inode, struct iatt *buf,                       struct iatt *preparent, struct iatt *postparent)  { -        afr_local_t *   local = NULL; -        afr_private_t * priv  = NULL; -        uint64_t      ctx = 0; -        afr_fd_ctx_t *fd_ctx = NULL; -        int ret = 0; -        int call_count = -1; -        int child_index = -1; +        afr_local_t     *local = NULL; +        afr_private_t   *priv  = NULL; +        uint64_t        ctx = 0; +        afr_fd_ctx_t    *fd_ctx = NULL; +        int             ret = 0; +        int             call_count = -1; +        int             child_index = -1; +        int32_t         *fresh_children = NULL;          local = frame->local;          priv  = this->private; @@ -166,18 +167,9 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          fd_ctx->opened_on[child_index] = 1;                          fd_ctx->flags                  = local->cont.create.flags; -                        if (local->success_count == 0) { +                        if (local->success_count == 0)                                  local->cont.create.buf        = *buf; -                                if (priv->read_child >= 0) { -                                        afr_set_read_child (this, inode, -                                                            priv->read_child); -                                } else { -                                        afr_set_read_child (this, inode, -                                                            local->read_child_index); -                                } -                        } -                          if (child_index == local->read_child_index) {                                  local->cont.create.read_child_buf = *buf;                                  local->cont.create.preparent      = *preparent; @@ -186,6 +178,8 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->cont.create.inode = inode; +                        fresh_children = local->fresh_children; +                        fresh_children[local->success_count] = child_index;                          local->success_count++;                  } @@ -198,6 +192,10 @@ unlock:          call_count = afr_frame_return (frame);          if (call_count == 0) { +                afr_set_read_ctx_from_policy (this, inode, +                                              local->fresh_children, +                                              local->read_child_index, +                                              priv->read_child);                  local->transaction.unwind (frame, this);                  local->transaction.resume (frame, this); @@ -382,10 +380,11 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                      struct iatt *buf, struct iatt *preparent,                      struct iatt *postparent)  { -        afr_local_t *   local = NULL; -        afr_private_t * priv  = NULL; -        int call_count = -1; -        int child_index = -1; +        afr_local_t     *local          = NULL; +        afr_private_t   *priv           = NULL; +        int             call_count      = -1; +        int             child_index     = -1; +        int32_t         *fresh_children = NULL;          local = frame->local;          priv = this->private; @@ -400,18 +399,9 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  if (op_ret != -1) {                          local->op_ret = op_ret; -                        if (local->success_count == 0){ +                        if (local->success_count == 0)                                  local->cont.mknod.buf   = *buf; -                                if (priv->read_child >= 0) { -                                        afr_set_read_child (this, inode, -                                                            priv->read_child); -                                } else { -                                        afr_set_read_child (this, inode, -                                                            local->read_child_index); -                                } -                        } -                          if (child_index == local->read_child_index) {                                  local->cont.mknod.read_child_buf = *buf;                                  local->cont.mknod.preparent      = *preparent; @@ -420,6 +410,8 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->cont.mknod.inode = inode; +                        fresh_children = local->fresh_children; +                        fresh_children[local->success_count] = child_index;                          local->success_count++;                  } @@ -430,6 +422,10 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { +                afr_set_read_ctx_from_policy (this, inode, +                                              local->fresh_children, +                                              local->read_child_index, +                                              priv->read_child);                  local->transaction.unwind (frame, this);                  local->transaction.resume (frame, this); @@ -609,10 +605,11 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                      struct iatt *buf, struct iatt *preparent,                      struct iatt *postparent)  { -        afr_local_t *   local = NULL; -        afr_private_t * priv  = NULL; -        int call_count = -1; -        int child_index = -1; +        afr_local_t     *local          = NULL; +        afr_private_t   *priv           = NULL; +        int             call_count      = -1; +        int             child_index     = -1; +        int32_t         *fresh_children = NULL;          local = frame->local;          priv = this->private; @@ -627,18 +624,9 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  if (op_ret != -1) {                          local->op_ret           = op_ret; -                        if (local->success_count == 0) { +                        if (local->success_count == 0)                                  local->cont.mkdir.buf   = *buf; -                                if (priv->read_child >= 0) { -                                        afr_set_read_child (this, inode, -                                                            priv->read_child); -                                } else { -                                        afr_set_read_child (this, inode, -                                                            local->read_child_index); -                                } -                        } -                          if (child_index == local->read_child_index) {                                  local->cont.mkdir.read_child_buf = *buf;                                  local->cont.mkdir.preparent      = *preparent; @@ -647,6 +635,8 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->cont.mkdir.inode = inode; +                        fresh_children = local->fresh_children; +                        fresh_children[local->success_count] = child_index;                          local->success_count++;                  } @@ -657,6 +647,10 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { +                afr_set_read_ctx_from_policy (this, inode, +                                              local->fresh_children, +                                              local->read_child_index, +                                              priv->read_child);                  local->transaction.unwind (frame, this);                  local->transaction.resume (frame, this); @@ -837,10 +831,11 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                     struct iatt *buf, struct iatt *preparent,                     struct iatt *postparent)  { -        afr_local_t *   local = NULL; -        afr_private_t * priv  = NULL; -        int call_count = -1; -        int child_index = -1; +        afr_local_t     *local          = NULL; +        afr_private_t   *priv           = NULL; +        int             call_count      = -1; +        int             child_index     = -1; +        int32_t         *fresh_children = NULL;          local = frame->local;          priv = this->private; @@ -857,14 +852,6 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          if (local->success_count == 0) {                                  local->cont.link.buf        = *buf; - -                                if (priv->read_child >= 0) { -                                        afr_set_read_child (this, inode, -                                                            priv->read_child); -                                } else { -                                        afr_set_read_child (this, inode, -                                                            local->read_child_index); -                                }                          }                          if (child_index == local->read_child_index) { @@ -875,6 +862,8 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->cont.link.inode    = inode; +                        fresh_children = local->fresh_children; +                        fresh_children[local->success_count] = child_index;                          local->success_count++;                  } @@ -885,6 +874,10 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { +                afr_set_read_ctx_from_policy (this, inode, +                                              local->fresh_children, +                                              local->read_child_index, +                                              priv->read_child);                  local->transaction.unwind (frame, this);                  local->transaction.resume (frame, this); @@ -1062,10 +1055,11 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                        struct iatt *buf, struct iatt *preparent,                        struct iatt *postparent)  { -        afr_local_t *   local = NULL; -        afr_private_t * priv  = NULL; -        int call_count = -1; -        int child_index = -1; +        afr_local_t     *local          = NULL; +        afr_private_t   *priv           = NULL; +        int             call_count      = -1; +        int             child_index     = -1; +        int32_t         *fresh_children = NULL;          local = frame->local;          priv = this->private; @@ -1080,16 +1074,8 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  if (op_ret != -1) {                          local->op_ret   = op_ret; -                        if (local->success_count == 0) { +                        if (local->success_count == 0)                                  local->cont.symlink.buf        = *buf; -                                if (priv->read_child >= 0) { -                                        afr_set_read_child (this, inode, -                                                            priv->read_child); -                                } else { -                                        afr_set_read_child (this, inode, -                                                            local->read_child_index); -                                } -                        }                          if (child_index == local->read_child_index) {                                  local->cont.symlink.read_child_buf = *buf; @@ -1099,6 +1085,8 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->cont.symlink.inode    = inode; +                        fresh_children = local->fresh_children; +                        fresh_children[local->success_count] = child_index;                          local->success_count++;                  } @@ -1109,6 +1097,10 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { +                afr_set_read_ctx_from_policy (this, inode, +                                              local->fresh_children, +                                              local->read_child_index, +                                              priv->read_child);                  local->transaction.unwind (frame, this);                  local->transaction.resume (frame, this); @@ -1424,7 +1416,7 @@ afr_rename (call_frame_t *frame, xlator_t *this,          loc_copy (&local->loc,    oldloc);          loc_copy (&local->newloc, newloc); -        local->read_child_index = afr_read_child (this, oldloc->inode); +        local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);          local->cont.rename.ino = oldloc->inode->ino; diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 742d3687ceb..f2507f07ef4 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -134,7 +134,15 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)          ALLOC_OR_GOTO (local, afr_local_t, out); -        read_child = afr_read_child (this, loc->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } + +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);          if ((read_child >= 0) && (priv->child_up[read_child])) {                  call_child = read_child; @@ -252,7 +260,14 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)          frame->local = local; -        read_child = afr_read_child (this, loc->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);          if ((read_child >= 0) && (priv->child_up[read_child])) {                  call_child = read_child; @@ -375,7 +390,14 @@ afr_fstat (call_frame_t *frame, xlator_t *this,          VALIDATE_OR_GOTO (fd->inode, out); -        read_child = afr_read_child (this, fd->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } +        read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);          if ((read_child >= 0) && (priv->child_up[read_child])) {                  call_child = read_child; @@ -494,7 +516,14 @@ afr_readlink (call_frame_t *frame, xlator_t *this,          frame->local = local; -        read_child = afr_read_child (this, loc->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);          if ((read_child >= 0) && (priv->child_up[read_child])) {                  call_child = read_child; @@ -879,7 +908,14 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,                  }          } -        read_child = afr_read_child (this, loc->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);          if ((read_child >= 0) && (priv->child_up[read_child])) {                  call_child = read_child; @@ -1020,7 +1056,14 @@ afr_readv (call_frame_t *frame, xlator_t *this,          frame->local = local; -        read_child = afr_read_child (this, fd->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } +        read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);          if ((read_child >= 0) && (priv->child_up[read_child])) {                  call_child = read_child; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 8b82add6276..564bb953a52 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -87,7 +87,7 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local; -        read_child = afr_read_child (this, local->fd->inode); +        read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);          LOCK (&frame->lock);          { @@ -343,7 +343,7 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local;          priv  = this->private; -        read_child = afr_read_child (this, local->loc.inode); +        read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);          LOCK (&frame->lock);          { @@ -550,7 +550,7 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local;          priv  = this->private; -        read_child = afr_read_child (this, local->fd->inode); +        read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);          LOCK (&frame->lock);          { @@ -712,8 +712,8 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,          priv = this->private;          ALLOC_OR_GOTO (local, afr_local_t, out); -          ret = AFR_LOCAL_INIT (local, priv); +          if (ret < 0) {                  op_errno = -ret;                  goto out; @@ -797,7 +797,7 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local;          priv  = this->private; -        read_child = afr_read_child (this, local->loc.inode); +        read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);          LOCK (&frame->lock);          { @@ -1004,7 +1004,7 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local;          priv  = this->private; -        read_child = afr_read_child (this, local->fd->inode); +        read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);          LOCK (&frame->lock);          { diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 14064ebcd76..de2049589e7 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -42,6 +42,7 @@ enum gf_afr_mem_types_ {          gf_afr_mt_entry_name,          gf_afr_mt_pump_priv,          gf_afr_mt_locked_fd, +        gf_afr_mt_inode_ctx_t,          gf_afr_mt_end  };  #endif diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index e6304a5ea7d..4aa587399b5 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -434,7 +434,7 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this)                  "path: %s, reason: Replicate up down flush, data lock is held",                  sh_type_str, local->loc.path); -        afr_self_heal (frame, this); +        afr_self_heal (frame, this, local->fd->inode);          return 0;  } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index abc9ccb0fbe..16345bee738 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1640,18 +1640,15 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          afr_local_t *     local = NULL;          afr_self_heal_t * sh    = NULL;          char              sh_type_str[256] = {0,}; +        gf_boolean_t      split_brain = _gf_false;          priv  = this->private;          local = bgsh_frame->local;          sh    = &local->self_heal; -        if (local->govinda_gOvinda) { -                afr_set_split_brain (this, local->cont.lookup.inode, -                                     _gf_true); -        } else { -                afr_set_split_brain (this, local->cont.lookup.inode, -                                     _gf_false); -        } +        if (local->govinda_gOvinda) +                split_brain = _gf_true; +        afr_set_split_brain (this, sh->inode, split_brain);          afr_self_heal_type_str_get (sh, sh_type_str,                                      sizeof(sh_type_str)); @@ -1683,7 +1680,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)  }  int -afr_self_heal (call_frame_t *frame, xlator_t *this) +afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; @@ -1726,6 +1723,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)          sh_local        = afr_local_copy (local, this);          sh_frame->local = sh_local;          sh              = &sh_local->self_heal; +        sh->inode       = inode;          sh->orig_frame  = frame; @@ -1761,8 +1759,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)                                                   priv->child_count,                                                   gf_afr_mt_int32_t);          } -        sh->child_success = GF_CALLOC (sizeof (*sh->child_success), -                                       priv->child_count, gf_afr_mt_int32_t); +        sh->success_children = afr_fresh_children_create (priv->child_count); +        sh->fresh_children = afr_fresh_children_create (priv->child_count);          FRAME_SU_DO (sh_frame, afr_local_t); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 3ee1db0e726..f9a25797275 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -299,12 +299,25 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,                                 xlator_t *this, int32_t op_ret,                                 int32_t op_errno, dict_t *xattr)  { +        afr_local_t     *local     = NULL;          int             call_count = 0; +        long            i          = 0; +        afr_self_heal_t *sh        = NULL; +        afr_private_t   *priv      = NULL; +        local = frame->local; +        priv  = this->private; +        sh = &local->self_heal; +        i = (long)cookie; + +        afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);          call_count = afr_frame_return (frame); -        if (call_count == 0) +        if (call_count == 0) { +                afr_inode_set_read_ctx (this, sh->inode, sh->source, +                                        sh->fresh_children);                  afr_sh_data_finish (frame, this); +        }          return 0;  } @@ -602,7 +615,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)          nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,                                       priv->child_count, AFR_SELF_HEAL_DATA, -                                     sh->child_success, this->name); +                                     sh->success_children, this->name);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE, @@ -670,7 +683,11 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)                          sh->sources[i] = 0;          } -        afr_set_read_child (this, local->loc.inode, sh->source); +        afr_reset_children (sh->fresh_children, priv->child_count); +        afr_get_fresh_children (sh->success_children, sh->sources, +                                sh->fresh_children, priv->child_count); +        afr_inode_set_read_ctx (this, sh->inode, sh->source, +                                sh->fresh_children);          /*            quick-read might have read the file, so send xattr from @@ -691,56 +708,6 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)          return 0;  } -gf_boolean_t -afr_is_fresh_read_child (int32_t *sources, int32_t child_count, -                         int32_t read_child) -{ -        gf_boolean_t             is_fresh_child = _gf_false; - -        GF_ASSERT (read_child < child_count); - -        if ((read_child >= 0) && (read_child < child_count) && -             sources[read_child]) { -                is_fresh_child = _gf_true; -        } -        return is_fresh_child; -} - -static int -afr_select_read_child_from_policy (int32_t *sources, int32_t child_count, -                                   int32_t prev_read_child, -                                   int32_t config_read_child, -                                   int32_t *valid_children) -{ -        int32_t                  read_child = -1; -        int                      i          = 0; - -        GF_ASSERT (sources); - -        read_child = prev_read_child; -        if (_gf_true == afr_is_fresh_read_child (sources, child_count, -                                                 read_child)) -                goto out; - -        read_child = config_read_child; -        if (_gf_true == afr_is_fresh_read_child (sources, child_count, -                                                 read_child)) -                goto out; - -        for (i = 0; i < child_count; i++) { -                read_child = valid_children[i]; -                if (read_child < 0) -                        break; -                if (_gf_true == afr_is_fresh_read_child (sources, child_count, -                                                         read_child)) -                        goto out; -        } -        read_child = -1; - -out: -        return read_child; -} -  static void  afr_destroy_pending_matrix (int32_t **pending_matrix, int32_t child_count)  { @@ -796,7 +763,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          afr_self_heal_type       sh_type    = AFR_SELF_HEAL_INVALID;          int32_t                  **pending_matrix = NULL;          int32_t                  *sources         = NULL; -        int32_t                  *valid_children  = NULL; +        int32_t                  *success_children   = NULL;          struct iatt              *bufs            = NULL;          int32_t                  nsources         = 0;          int32_t                  prev_read_child  = -1; @@ -805,7 +772,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          priv = this->private;          bufs = local->cont.lookup.bufs; -        valid_children = local->cont.lookup.child_success; +        success_children = local->cont.lookup.success_children;          sh = &local->self_heal;          pending_matrix = afr_create_pending_matrix (priv->child_count); @@ -826,7 +793,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          nsources = afr_mark_sources (sources, pending_matrix, bufs,                                       priv->child_count, sh_type, -                                     valid_children, this->name); +                                     success_children, this->name);          if (nsources < 0) {                  ret = -1;                  goto out; @@ -834,11 +801,11 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          prev_read_child = local->read_child_index;          config_read_child = priv->read_child; -        read_child = afr_select_read_child_from_policy (sources, +        read_child = afr_select_read_child_from_policy (success_children,                                                          priv->child_count,                                                          prev_read_child,                                                          config_read_child, -                                                        valid_children); +                                                        sources);          ret = 0;          local->cont.lookup.sources = sources;  out: @@ -875,7 +842,7 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,                                  priv->children[child_index]->name);                          sh->buf[child_index] = *buf; -                        sh->child_success[sh->success_count] = child_index; +                        sh->success_children[sh->success_count] = child_index;                          sh->success_count++;                  }          } @@ -909,8 +876,7 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)          local->call_count = call_count; -        for (i = 0; i < priv->child_count; i++) -                sh->child_success[i] = -1; +        afr_reset_children (sh->success_children, priv->child_count);          sh->success_count = 0;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 0425644b3fc..50870afb204 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -125,12 +125,16 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,          afr_local_t         *orig_local = NULL;          call_frame_t        *orig_frame = NULL;          afr_private_t       *priv       = NULL; +        int32_t             read_child  = -1;          local = frame->local;          priv  = this->private; +        sh = &local->self_heal; +        i = (long)cookie; + +        afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);          if (op_ret == -1) { -                i = (long)cookie;                  gf_log (this->name, GF_LOG_INFO,                          "%s: failed to erase pending xattrs on %s (%s)",                          local->loc.path, priv->children[i]->name, @@ -140,8 +144,14 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,          call_count = afr_frame_return (frame);          if (call_count == 0) { -                sh = &local->self_heal; - +                if (sh->source == -1) { +                        //this happens if the forced merge option is set +                        read_child = sh->fresh_children[0]; +                } else { +                        read_child = sh->source; +                } +                afr_inode_set_read_ctx (this, sh->inode, read_child, +                                        sh->fresh_children);                  orig_frame = sh->orig_frame;                  orig_local = orig_frame->local; @@ -2165,7 +2175,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)          nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,                                       priv->child_count, AFR_SELF_HEAL_ENTRY, -                                     sh->child_success, this->name); +                                     sh->success_children, this->name);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE, @@ -2180,6 +2190,13 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)          sh->source = source; +        afr_reset_children (sh->fresh_children, priv->child_count); +        afr_get_fresh_children (sh->success_children, sh->sources, +                                sh->fresh_children, priv->child_count); +        afr_inode_set_read_ctx (this, sh->inode, sh->source, +                                sh->fresh_children); + +  heal:          afr_sh_entry_sync_prepare (frame, this); @@ -2208,7 +2225,7 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,                  if (op_ret != -1) {                          sh->xattr[child_index] = dict_ref (xattr);                          sh->buf[child_index] = *buf; -                        sh->child_success[sh->success_count] = child_index; +                        sh->success_children[sh->success_count] = child_index;                          sh->success_count++;                  }          } @@ -2258,8 +2275,7 @@ afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)                  }          } -        for (i = 0; i < priv->child_count; i++) -                sh->child_success[i] = -1; +        afr_reset_children (sh->success_children, priv->child_count);          sh->success_count = 0;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index fe1db60e2e4..5993e9596ba 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -147,15 +147,32 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,                                     xlator_t *this, int32_t op_ret,                                     int32_t op_errno, dict_t *xattr)  { -        afr_local_t     *local = NULL; +        afr_local_t     *local     = NULL;          int             call_count = 0; +        long            i          = 0; +        afr_self_heal_t *sh        = NULL; +        afr_private_t   *priv      = NULL;          local = frame->local; +        priv  = this->private; +        sh = &local->self_heal; +        i = (long)cookie; +        if ((!IA_ISREG (sh->buf[sh->source].ia_type)) && +            (!IA_ISDIR (sh->buf[sh->source].ia_type))) { +                afr_fresh_children_add_child (sh->fresh_children, i, +                                              priv->child_count); +        }          call_count = afr_frame_return (frame); -        if (call_count == 0) +        if (call_count == 0) { +                if ((!IA_ISREG (sh->buf[sh->source].ia_type)) && +                    (!IA_ISDIR (sh->buf[sh->source].ia_type))) { +                        afr_inode_set_read_ctx (this, sh->inode, sh->source, +                                                sh->fresh_children); +                }                  afr_sh_metadata_finish (frame, this); +        }          return 0;  } @@ -483,7 +500,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)          nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,                                       priv->child_count, AFR_SELF_HEAL_METADATA, -                                     sh->child_success, this->name); +                                     sh->success_children, this->name);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE, @@ -545,6 +562,16 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)                          sh->sources[i] = 0;          } +        if ((!IA_ISREG (sh->buf[source].ia_type)) && +            (!IA_ISDIR (sh->buf[source].ia_type))) { +                afr_reset_children (sh->fresh_children, +                                          priv->child_count); +                afr_get_fresh_children (sh->success_children, sh->sources, +                                        sh->fresh_children, priv->child_count); +                afr_inode_set_read_ctx (this, sh->inode, sh->source, +                                        sh->fresh_children); +        } +          afr_sh_metadata_sync_prepare (frame, this);          return 0; @@ -582,7 +609,7 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          sh->buf[child_index] = *buf;                          if (xattr)                                  sh->xattr[child_index] = dict_ref (xattr); -                        sh->child_success[sh->success_count] = child_index; +                        sh->success_children[sh->success_count] = child_index;                          sh->success_count++;                  } else {                          gf_log (this->name, GF_LOG_INFO, @@ -637,8 +664,7 @@ afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)                  }          } -        for (i = 0; i < priv->child_count; i++) -                sh->child_success[i] = -1; +        afr_reset_children (sh->success_children, priv->child_count);          sh->success_count = 0;          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 976dae4754d..1056a366223 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -49,11 +49,8 @@ int  afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);  int -afr_self_heal (call_frame_t *frame, xlator_t *this); +afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode); -gf_boolean_t -afr_is_fresh_read_child (int32_t *sources, int32_t child_count, -                         int32_t read_child);  int  afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,                                            dict_t **xattr, diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 7652d3d1e35..b8d2e27a448 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -407,24 +407,31 @@ void  afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,                         afr_transaction_type type)  { -        int           curr_read_child = -1; -        int           new_read_child = -1; +        int             curr_read_child = -1; +        int             new_read_child = -1;          afr_private_t   *priv = NULL; -        afr_local_t  *local = NULL; -        int         **pending = NULL; -        int           idx = 0; +        afr_local_t     *local = NULL; +        int             **pending = NULL; +        int             idx = 0; +        int32_t         *fresh_children = NULL; +        size_t          success_count = 0;          idx = afr_index_for_transaction_type (type);          priv = this->private;          local = frame->local; -        curr_read_child = afr_read_child (this, inode); +        curr_read_child = afr_inode_get_read_ctx (this, inode, NULL);          pending = local->pending; +        GF_ASSERT (curr_read_child >= 0); +          if (pending[curr_read_child][idx] != 0) -                return; +                goto out; -        /* need to set new read_child */ +        fresh_children = GF_CALLOC (priv->child_count, sizeof (*fresh_children), +                                    gf_afr_mt_int32_t); +        if (!fresh_children) +                goto out;          for (new_read_child = 0; new_read_child < priv->child_count;               new_read_child++) { @@ -435,15 +442,16 @@ afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,                  if (pending[new_read_child][idx] == 0)                          /* op just failed */                          continue; - -                break; +                fresh_children[success_count] = new_read_child; +                success_count++;          } -        if (new_read_child == priv->child_count) -                /* all children uneligible. leave as-is */ -                return; - -        afr_set_read_child (this, inode, new_read_child); +        afr_inode_set_read_ctx (this, inode, fresh_children[0], +                                fresh_children); +out: +        if (fresh_children) +                GF_FREE (fresh_children); +        return;  } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index d8939ab4d62..c6705fc68d0 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -814,6 +814,7 @@ struct xlator_dumpops dumpops = {  struct xlator_cbks cbks = {          .release     = afr_release,          .releasedir  = afr_releasedir, +        .forget      = afr_forget,  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 55c485f14d8..0b8f96ec8b9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -37,6 +37,22 @@  struct _pump_private; +typedef struct afr_inode_params_ { +        uint64_t mask_type; +        union { +                gf_boolean_t value; +                struct { +                        int32_t read_child; +                        int32_t *fresh_children; +                } read_ctx; +        } u; +} afr_inode_params_t; + +typedef struct afr_inode_ctx_ { +        uint64_t masks; +        int32_t  *fresh_children;//increasing order of latency +} afr_inode_ctx_t; +  typedef struct _afr_private {          gf_lock_t lock;               /* to guard access to child_count, etc */          unsigned int child_count;     /* total number of children   */ @@ -121,6 +137,8 @@ typedef struct {          ia_type_t type;                   /* st_mode of the entry we're doing                                               self-heal on */ +        inode_t   *inode;                 /* inode on which the self-heal is +                                             performed on */          /* Function to call to unwind. If self-heal is being done in the             background, this function will be called as soon as possible. */ @@ -140,8 +158,10 @@ typedef struct {          /* array containing if the lookups succeeded in the order of response           */ -        int32_t *child_success; +        int32_t *success_children;          int     success_count; +        /* array containing the fresh children found in the self-heal process */ +        int32_t *fresh_children;          /* array of errno's, one for each child */          int *child_errno; @@ -311,6 +331,7 @@ typedef struct _afr_local {          glusterfs_fop_t fop;          unsigned char *child_up; +        int32_t       *fresh_children; //in the order of response          int32_t *child_errno; @@ -354,8 +375,8 @@ typedef struct _afr_local {                          struct iatt *postparents;                          struct iatt *bufs;                          int32_t read_child; -                        int32_t *child_success;//in the order of response                          int32_t *sources; +                        int32_t *success_children;                  } lookup;                  struct { @@ -732,11 +753,12 @@ int pump_start (call_frame_t *frame, xlator_t *this);  int  afr_fd_ctx_set (xlator_t *this, fd_t *fd); -uint64_t -afr_read_child (xlator_t *this, inode_t *inode); +int32_t +afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);  void -afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child); +afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child, +                        int32_t *fresh_children);  void  afr_build_parent_loc (loc_t *parent, loc_t *child); @@ -772,7 +794,7 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,  void  afr_set_opendir_done (xlator_t *this, inode_t *inode); -uint64_t +gf_boolean_t  afr_is_opendir_done (xlator_t *this, inode_t *inode);  void @@ -829,12 +851,24 @@ int32_t  afr_marker_getxattr (call_frame_t *frame, xlator_t *this,                       loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv ); +int32_t * +afr_fresh_children_create (int32_t child_count); +  int  AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);  int  afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,                          transaction_lk_type_t lk_type); +int +afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count, +                                   int32_t prev_read_child, +                                   int32_t config_read_child, int32_t *sources); + +void +afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode, +                              int32_t *fresh_children, int32_t prev_read_child, +                              int32_t config_read_child);  /**   * first_up_child - return the index of the first child that is up @@ -862,4 +896,15 @@ afr_first_up_child (afr_private_t *priv)          return ret;  } +int32_t +afr_next_call_child (int32_t *fresh_children, size_t child_count, +                     int32_t *last_index, int32_t read_child); +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, +                        int32_t *fresh_children, unsigned int child_count); +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, +                              int32_t child_count); +void +afr_reset_children (int32_t *fresh_children, int32_t child_count);  #endif /* __AFR_H__ */ diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 48ce2c94568..300b0850443 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -1536,7 +1536,14 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,                  return 0;          } -        read_child = afr_read_child (this, loc->inode); +        local->fresh_children = GF_CALLOC (priv->child_count, +                                          sizeof (*local->fresh_children), +                                          gf_afr_mt_int32_t); +        if (local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);          if (read_child >= 0) {                  call_child = read_child;  | 
