diff options
| author | Pranith K <pranithk@gluster.com> | 2011-07-14 08:07:46 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-07-17 07:45:11 -0700 | 
| commit | b0a3a3fda3f0993cd8c0e1b135bb569b6543e7c0 (patch) | |
| tree | 59e736d4246af1ce27d4bf7c9e5e42648d4c051b /xlators/cluster/afr | |
| parent | bfc0e16e43815ab6d6e67f4bd26694ebd72b3360 (diff) | |
cluster/afr: Choose next call child from fresh-children for inode-read-fops
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2840 (files not getting self-healed when the first child goes down)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
Diffstat (limited to 'xlators/cluster/afr')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 37 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 95 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.h | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 548 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 64 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/pump.c | 80 | 
7 files changed, 391 insertions, 442 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e8afc6d8de6..21f7b4e4356 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -452,8 +452,9 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,   * in execution there is a chance for inode's read_ctx to change.   */  int32_t -afr_next_call_child (int32_t *fresh_children, size_t child_count, -                     int32_t *last_index, int32_t read_child) +afr_next_call_child (int32_t *fresh_children, unsigned char *child_up, +                     size_t child_count, int32_t *last_index, +                     int32_t read_child)  {          int             next_index      = 0;          int32_t         next_call_child = -1; @@ -463,12 +464,12 @@ afr_next_call_child (int32_t *fresh_children, size_t child_count,          next_index = *last_index;  retry:          next_index++; -        if (next_index >= child_count) +        if ((next_index >= child_count) || +           (fresh_children[next_index] == -1))                  goto out; -        if (fresh_children[next_index] == read_child) +        if ((fresh_children[next_index] == read_child) || +           (!child_up[fresh_children[next_index]]))                  goto retry; -        if (fresh_children[next_index] == -1) -                goto out;          *last_index = next_index;          next_call_child = fresh_children[next_index];  out: @@ -1475,7 +1476,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this,          if (ret == 0) {                  /* lookup is a revalidate */ -                local->read_child_index = afr_inode_get_read_ctx (this, loc->inode, +                local->read_child_index = afr_inode_get_read_ctx (this, +                                                                  loc->inode,                                                                    NULL);          } else {                  LOCK (&priv->read_child_lock); @@ -3070,6 +3072,24 @@ out:  }  int +afr_first_up_child (unsigned char *child_up, size_t child_count) +{ +        int         ret      = -1; +        int         i        = 0; + +        GF_ASSERT (child_up); + +        for (i = 0; i < child_count; i++) { +                if (child_up[i]) { +                        ret = i; +                        break; +                } +        } + +        return ret; +} + +int  AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)  {          local->op_ret = -1; @@ -3147,7 +3167,8 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)          if (priv->optimistic_change_log && child_up_count == priv->child_count)                  local->optimistic_change_log = 1; -        local->first_up_child = afr_first_up_child (priv); +        local->first_up_child = afr_first_up_child (local->child_up, +                                                    priv->child_count);          local->child_errno = GF_CALLOC (sizeof (*local->child_errno),                                          priv->child_count, diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 8593d0c14c5..ce941f0189e 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -49,7 +49,6 @@  #include "afr-self-heal.h"  #include "afr-self-heal-common.h" -  int  afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)  { @@ -517,24 +516,38 @@ int32_t  afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                    int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)  { -        afr_private_t *  priv        = NULL; -        afr_local_t *    local       = NULL; -        xlator_t **      children    = NULL; -        int              call_child  = 0; -        int              ret         = 0; -        gf_dirent_t *    entry       = NULL; -        gf_dirent_t *    tmp         = NULL; -        int              child_index = -1; -        uint64_t         ctx         = 0; -        afr_fd_ctx_t    *fd_ctx      = NULL; -        off_t            offset      = 0; +        afr_private_t *  priv            = NULL; +        afr_local_t *    local           = NULL; +        xlator_t **      children        = NULL; +        int32_t          next_call_child = -1; +        int              ret             = 0; +        gf_dirent_t *    entry           = NULL; +        gf_dirent_t *    tmp             = NULL; +        int32_t          *last_index     = NULL; +        int32_t          read_child      = -1; +        int32_t         *fresh_children   = NULL; +        uint64_t         ctx             = 0; +        afr_fd_ctx_t    *fd_ctx          = NULL; +        off_t            offset          = 0; +        int32_t         call_child       = -1;          priv     = this->private;          children = priv->children;          local = frame->local; -        child_index = (long) cookie; +        read_child = (long) cookie; +        last_index = &local->cont.readdir.last_index; +        fresh_children = local->fresh_children; + +        /* the value of the last_index changes if afr_next_call_child is +         * called. So to find the call_child of this callback use last_index +         * before the next_call_child call. +         */ +        if (*last_index == -1) +                call_child = read_child; +        else +                call_child = fresh_children[*last_index];          if (priv->strict_readdir) {                  ret = fd_ctx_get (local->fd, this, &ctx); @@ -548,25 +561,25 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  fd_ctx = (afr_fd_ctx_t *)(long) ctx; -                if (child_went_down (op_ret, op_errno)) { -                        if (all_tried (child_index, priv->child_count)) { -                                gf_log (this->name, GF_LOG_INFO, -                                        "all options tried going out"); +                if (op_ret == -1) { +                        next_call_child = afr_next_call_child (fresh_children, +                                                               local->child_up, +                                                               priv->child_count, +                                                               last_index, +                                                               read_child); +                        if (next_call_child < 0)                                  goto out; -                        } - -                        call_child = ++child_index; -                          gf_log (this->name, GF_LOG_TRACE,                                  "starting readdir afresh on child %d, offset %"PRId64, -                                call_child, (uint64_t) 0); +                                next_call_child, (uint64_t) 0);                          fd_ctx->failed_over = _gf_true;                          STACK_WIND_COOKIE (frame, afr_readdirp_cbk, -                                           (void *) (long) call_child, -                                           children[call_child], -                                           children[call_child]->fops->readdirp, local->fd, +                                           (void *) (long) read_child, +                                           children[next_call_child], +                                           children[next_call_child]->fops->readdirp, +                                           local->fd,                                             local->cont.readdir.size, 0);                          return 0;                  } @@ -603,12 +616,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  gf_log (this->name, GF_LOG_TRACE,                                          "trying to fetch non-duplicate entries "                                          "from offset %"PRId64", child %s", -                                        offset, children[child_index]->name); +                                        offset, children[call_child]->name);                                  STACK_WIND_COOKIE (frame, afr_readdirp_cbk, -                                                   (void *) (long) child_index, -                                                   children[child_index], -                                                   children[child_index]->fops->readdirp, +                                                   (void *) (long) read_child, +                                                   children[call_child], +                                                   children[call_child]->fops->readdirp,                                                     local->fd, local->cont.readdir.size, offset);                                  return 0;                          } @@ -623,7 +636,6 @@ out:          return 0;  } -  int32_t  afr_do_readdir (call_frame_t *frame, xlator_t *this,                  fd_t *fd, size_t size, off_t offset, int whichop) @@ -637,6 +649,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,          int              ret        = -1;          int32_t          op_ret     = -1;          int32_t          op_errno   = 0; +        uint64_t         read_child = 0;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -646,19 +659,29 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,          children = priv->children;          ALLOC_OR_GOTO (local, afr_local_t, out); +        frame->local = local; +          ret = AFR_LOCAL_INIT (local, priv);          if (ret < 0) {                  op_errno = -ret;                  goto out;          } -        frame->local = local; +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } -        call_child = afr_first_up_child (priv); -        if (call_child == -1) { -                op_errno = ENOTCONN; -                gf_log (this->name, GF_LOG_INFO, -                        "no child is up"); +        read_child = afr_inode_get_read_ctx (this, fd->inode, +                                             local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.readdir.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1;                  goto out;          } diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h index 40c7b6aef28..3143cb97368 100644 --- a/xlators/cluster/afr/src/afr-dir-read.h +++ b/xlators/cluster/afr/src/afr-dir-read.h @@ -38,11 +38,6 @@ afr_readdirp (call_frame_t *frame, xlator_t *this,  	     fd_t *fd, size_t size, off_t offset);  int32_t -afr_getdents (call_frame_t *frame, xlator_t *this, -	      fd_t *fd, size_t size, off_t offset, int32_t flag); - - -int32_t  afr_checksum (call_frame_t *frame, xlator_t *this,  	      loc_t *loc, int32_t flags); diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index f2507f07ef4..caac56f6596 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -63,13 +63,14 @@ int32_t  afr_access_cbk (call_frame_t *frame, void *cookie,                  xlator_t *this, int32_t op_ret, int32_t op_errno)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        xlator_t **     children   = NULL; -        int             unwind     = 1; -        int             last_tried = -1; -        int             this_try   = -1; -        int             read_child = -1; +        afr_private_t * priv            = NULL; +        afr_local_t *   local           = NULL; +        xlator_t **     children        = NULL; +        int             unwind          = 1; +        int32_t         *last_index     = NULL; +        int32_t         next_call_child = -1; +        int32_t         read_child      = -1; +        int32_t         *fresh_children  = NULL;          priv     = this->private;          children = priv->children; @@ -79,27 +80,21 @@ afr_access_cbk (call_frame_t *frame, void *cookie,          read_child = (long) cookie;          if (op_ret == -1) { -        retry: -                last_tried = local->cont.access.last_tried; - -                if (all_tried (last_tried, priv->child_count)) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%s: all subvolumes tried, going out", -                                local->loc.path); +                last_index = &local->cont.access.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0)                          goto out; -                } -                this_try    = ++local->cont.access.last_tried; - -                if (this_try == read_child) { -                        goto retry; -                }                  unwind = 0;                  STACK_WIND_COOKIE (frame, afr_access_cbk,                                     (void *) (long) read_child, -                                   children[this_try], -                                   children[this_try]->fops->access, +                                   children[next_call_child], +                                   children[next_call_child]->fops->access,                                     &local->loc, local->cont.access.mask);          } @@ -115,13 +110,13 @@ out:  int32_t  afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)  { -        afr_private_t *  priv       = NULL; -        xlator_t **      children   = NULL; -        int              call_child = 0; -        afr_local_t     *local      = NULL; -        int32_t          read_child = -1; -        int32_t          op_ret     = -1; -        int32_t          op_errno   = 0; +        afr_private_t   *priv      = NULL; +        xlator_t        **children = NULL; +        int             call_child = 0; +        afr_local_t     *local     = NULL; +        int32_t         op_ret     = -1; +        int32_t         op_errno   = 0; +        int32_t         read_child = -1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -133,32 +128,31 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)          children = priv->children;          ALLOC_OR_GOTO (local, afr_local_t, out); +        frame->local = local; -        local->fresh_children = GF_CALLOC (priv->child_count, -                                          sizeof (*local->fresh_children), -                                          gf_afr_mt_int32_t); -        if (local->fresh_children) { -                op_errno = ENOMEM; +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - -        if ((read_child >= 0) && (priv->child_up[read_child])) { -                call_child = read_child; - -                local->cont.access.last_tried = -1; +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) { +                op_errno = ENOMEM; +                goto out; +        } -        } else { -                call_child = afr_first_up_child (priv); -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_INFO, -                                "%s: no child is up", loc->path); -                        goto out; -                } -                local->cont.access.last_tried = call_child; +        read_child = afr_inode_get_read_ctx (this, loc->inode, +                                             local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.access.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          }          loc_copy (&local->loc, loc); @@ -166,7 +160,8 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)          STACK_WIND_COOKIE (frame, afr_access_cbk,                             (void *) (long) call_child, -                           children[call_child], children[call_child]->fops->access, +                           children[call_child], +                           children[call_child]->fops->access,                             loc, mask);          op_ret = 0; @@ -187,13 +182,14 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,                xlator_t *this, int32_t op_ret, int32_t op_errno,                struct iatt *buf)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        xlator_t **     children   = NULL; -        int             unwind     = 1; -        int             last_tried = -1; -        int             this_try   = -1; -        int             read_child = -1; +        afr_private_t * priv            = NULL; +        afr_local_t *   local           = NULL; +        xlator_t **     children        = NULL; +        int             unwind          = 1; +        int32_t         *last_index     = NULL; +        int32_t         next_call_child = -1; +        int32_t         read_child      = -1; +        int32_t         *fresh_children  = NULL;          priv     = this->private;          children = priv->children; @@ -203,27 +199,21 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,          local = frame->local;          if (op_ret == -1) { -        retry: -                last_tried = local->cont.stat.last_tried; - -                if (all_tried (last_tried, priv->child_count)) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%s: all subvolumes tried, going out", -                                local->loc.path); +                last_index = &local->cont.stat.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0)                          goto out; -                } -                this_try = ++local->cont.stat.last_tried; - -                if (this_try == read_child) { -                        goto retry; -                }                  unwind = 0;                  STACK_WIND_COOKIE (frame, afr_stat_cbk,                                     (void *) (long) read_child, -                                   children[this_try], -                                   children[this_try]->fops->stat, +                                   children[next_call_child], +                                   children[next_call_child]->fops->stat,                                     &local->loc);          } @@ -239,13 +229,13 @@ out:  int32_t  afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)  { -        afr_private_t * priv       = NULL; -        afr_local_t   * local      = NULL; -        xlator_t **     children   = NULL; -        int32_t         read_child = -1; +        afr_private_t   *priv      = NULL; +        afr_local_t     *local     = NULL; +        xlator_t        **children = NULL;          int             call_child = 0;          int32_t         op_ret     = -1;          int32_t         op_errno   = 0; +        int32_t         read_child = -1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -257,35 +247,30 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)          children = priv->children;          ALLOC_OR_GOTO (local, afr_local_t, out); -          frame->local = local; +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } -        local->fresh_children = GF_CALLOC (priv->child_count, -                                          sizeof (*local->fresh_children), -                                          gf_afr_mt_int32_t); -        if (local->fresh_children) { +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) {                  op_errno = ENOMEM;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - -        if ((read_child >= 0) && (priv->child_up[read_child])) { -                call_child = read_child; - -                local->cont.stat.last_tried = -1; - -        } else { -                call_child = afr_first_up_child (priv); -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_INFO, -                                "%s: no child is up", loc->path); -                        goto out; -                } -                local->cont.stat.last_tried = call_child; +        read_child = afr_inode_get_read_ctx (this, loc->inode, +                                             local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.stat.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          } -          loc_copy (&local->loc, loc);          local->cont.stat.ino = loc->inode->ino; @@ -313,13 +298,14 @@ int32_t  afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                 int32_t op_ret, int32_t op_errno, struct iatt *buf)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        xlator_t **     children   = NULL; -        int             unwind     = 1; -        int             last_tried = -1; -        int             this_try   = -1; -        int             read_child = -1; +        afr_private_t   *priv           = NULL; +        afr_local_t     *local          = NULL; +        xlator_t        **children      = NULL; +        int             unwind          = 1; +        int32_t         *last_index     = NULL; +        int32_t         next_call_child = -1; +        int32_t         read_child      = -1; +        int32_t         *fresh_children  = NULL;          priv     = this->private;          children = priv->children; @@ -329,27 +315,21 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          read_child = (long) cookie;          if (op_ret == -1) { -        retry: -                last_tried = local->cont.fstat.last_tried; - -                if (all_tried (last_tried, priv->child_count)) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%p: all subvolumes tried, going out", -                                local->fd); +                last_index = &local->cont.fstat.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0)                          goto out; -                } -                this_try   = ++local->cont.fstat.last_tried; - -                if (this_try == read_child) { -                        goto retry; -                }                  unwind = 0;                  STACK_WIND_COOKIE (frame, afr_fstat_cbk,                                     (void *) (long) read_child, -                                   children[this_try], -                                   children[this_try]->fops->fstat, +                                   children[next_call_child], +                                   children[next_call_child]->fops->fstat,                                     local->fd);          } @@ -366,13 +346,13 @@ int32_t  afr_fstat (call_frame_t *frame, xlator_t *this,             fd_t *fd)  { -        afr_private_t * priv       = NULL; -        afr_local_t   * local      = NULL; -        xlator_t **     children   = NULL; +        afr_private_t   *priv      = NULL; +        afr_local_t     *local     = NULL; +        xlator_t        **children = NULL;          int             call_child = 0; -        int32_t         read_child = -1;          int32_t         op_ret     = -1;          int32_t         op_errno   = 0; +        int32_t         read_child = 0;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -384,36 +364,36 @@ afr_fstat (call_frame_t *frame, xlator_t *this,          children = priv->children; -        ALLOC_OR_GOTO (local, afr_local_t, out); +        VALIDATE_OR_GOTO (fd->inode, out); +        ALLOC_OR_GOTO (local, afr_local_t, out);          frame->local = local; -        VALIDATE_OR_GOTO (fd->inode, out); +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } -        local->fresh_children = GF_CALLOC (priv->child_count, -                                          sizeof (*local->fresh_children), -                                          gf_afr_mt_int32_t); -        if (local->fresh_children) { +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) {                  op_errno = ENOMEM;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); -        if ((read_child >= 0) && (priv->child_up[read_child])) { -                call_child = read_child; +        read_child = afr_inode_get_read_ctx (this, fd->inode, +                                             local->fresh_children); -                local->cont.fstat.last_tried = -1; -        } else { -                call_child = afr_first_up_child (priv); -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_INFO, -                                "%p: no child is up", fd); -                        goto out; -                } -                local->cont.fstat.last_tried = call_child; +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.fstat.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          }          local->cont.fstat.ino = fd->inode->ino; @@ -442,13 +422,14 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,                    xlator_t *this, int32_t op_ret, int32_t op_errno,                    const char *buf, struct iatt *sbuf)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        xlator_t **     children   = NULL; -        int             unwind     = 1; -        int             last_tried = -1; -        int             this_try   = -1; -        int             read_child = -1; +        afr_private_t * priv                  = NULL; +        afr_local_t *   local                 = NULL; +        xlator_t **     children              = NULL; +        int             unwind                = 1; +        int32_t         *last_index           = NULL; +        int32_t         next_call_child       = -1; +        int32_t         read_child            = -1; +        int32_t         *fresh_children        = NULL;          priv     = this->private;          children = priv->children; @@ -458,26 +439,20 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,          read_child = (long) cookie;          if (op_ret == -1) { -        retry: -                last_tried = local->cont.readlink.last_tried; - -                if (all_tried (last_tried, priv->child_count)) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%s: all subvolumes tried, going out", -                                local->loc.path); +                last_index = &local->cont.readlink.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0)                          goto out; -                } -                this_try = ++local->cont.readlink.last_tried; - -                if (this_try == read_child) { -                        goto retry; -                }                  unwind = 0;                  STACK_WIND_COOKIE (frame, afr_readlink_cbk,                                     (void *) (long) read_child, -                                   children[this_try], -                                   children[this_try]->fops->readlink, +                                   children[next_call_child], +                                   children[next_call_child]->fops->readlink,                                     &local->loc,                                     local->cont.readlink.size);          } @@ -495,13 +470,13 @@ int32_t  afr_readlink (call_frame_t *frame, xlator_t *this,                loc_t *loc, size_t size)  { -        afr_private_t *  priv       = NULL; -        xlator_t **      children   = NULL; -        int              call_child = 0; -        afr_local_t     *local      = NULL; -        int32_t          read_child = -1; -        int32_t          op_ret     = -1; -        int32_t          op_errno   = 0; +        afr_private_t   *priv      = NULL; +        xlator_t        **children = NULL; +        int             call_child = 0; +        afr_local_t     *local     = NULL; +        int32_t         op_ret     = -1; +        int32_t         op_errno   = 0; +        int32_t         read_child = -1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -513,34 +488,28 @@ afr_readlink (call_frame_t *frame, xlator_t *this,          children = priv->children;          ALLOC_OR_GOTO (local, afr_local_t, out); -          frame->local = local; +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } -        local->fresh_children = GF_CALLOC (priv->child_count, -                                          sizeof (*local->fresh_children), -                                          gf_afr_mt_int32_t); -        if (local->fresh_children) { +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) {                  op_errno = ENOMEM;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - -        if ((read_child >= 0) && (priv->child_up[read_child])) { -                call_child = read_child; - -                local->cont.readlink.last_tried = -1; - -        } else { -                call_child = afr_first_up_child (priv); - -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_INFO, -                                "%s: no child is up", loc->path); -                        goto out; -                } - -                local->cont.readlink.last_tried = call_child; +        read_child = afr_inode_get_read_ctx (this, loc->inode, +                                             local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.readlink.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          }          loc_copy (&local->loc, loc); @@ -550,7 +519,8 @@ afr_readlink (call_frame_t *frame, xlator_t *this,          STACK_WIND_COOKIE (frame, afr_readlink_cbk,                             (void *) (long) call_child, -                           children[call_child], children[call_child]->fops->readlink, +                           children[call_child], +                           children[call_child]->fops->readlink,                             loc, size);          op_ret = 0; @@ -622,13 +592,14 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,                    xlator_t *this, int32_t op_ret, int32_t op_errno,                    dict_t *dict)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        xlator_t **     children   = NULL; -        int             unwind     = 1; -        int             last_tried = -1; -        int             this_try   = -1; -        int             read_child = -1; +        afr_private_t * priv            = NULL; +        afr_local_t *   local           = NULL; +        xlator_t **     children        = NULL; +        int             unwind          = 1; +        int32_t         *last_index     = NULL; +        int32_t         next_call_child = -1; +        int32_t         read_child      = -1; +        int32_t         *fresh_children  = NULL;          priv     = this->private;          children = priv->children; @@ -638,26 +609,20 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,          read_child = (long) cookie;          if (op_ret == -1) { -        retry: -                last_tried = local->cont.getxattr.last_tried; - -                if (all_tried (last_tried, priv->child_count)) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%s: all subvolumes tried, going out", -                                local->loc.path); +                last_index = &local->cont.getxattr.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0)                          goto out; -                } -                this_try = ++local->cont.getxattr.last_tried; - -                if (this_try == read_child) { -                        goto retry; -                }                  unwind = 0;                  STACK_WIND_COOKIE (frame, afr_getxattr_cbk,                                     (void *) (long) read_child, -                                   children[this_try], -                                   children[this_try]->fops->getxattr, +                                   children[next_call_child], +                                   children[next_call_child]->fops->getxattr,                                     &local->loc,                                     local->cont.getxattr.name);          } @@ -790,16 +755,16 @@ int32_t  afr_getxattr (call_frame_t *frame, xlator_t *this,                loc_t *loc, const char *name)  { -        afr_private_t *   priv        = NULL; -        xlator_t **       children    = NULL; -        int               call_child  = 0; -        afr_local_t     * local       = NULL; -        xlator_list_t   * trav        = NULL; -        xlator_t       ** sub_volumes = NULL; -        int               read_child  = -1; -        int               i           = 0; -        int32_t           op_ret      = -1; -        int32_t           op_errno    = 0; +        afr_private_t   *priv         = NULL; +        xlator_t        **children    = NULL; +        int             call_child    = 0; +        afr_local_t     *local        = NULL; +        xlator_list_t   *trav         = NULL; +        xlator_t        **sub_volumes = NULL; +        int             i             = 0; +        int32_t         op_ret        = -1; +        int32_t         op_errno      = 0; +        int32_t         read_child    = -1;          VALIDATE_OR_GOTO (frame, out); @@ -814,6 +779,12 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,          ALLOC_OR_GOTO (local, afr_local_t, out);          frame->local = local; +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } +          loc_copy (&local->loc, loc);          if (name)                  local->cont.getxattr.name = gf_strdup (name); @@ -908,36 +879,27 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,                  }          } -        local->fresh_children = GF_CALLOC (priv->child_count, -                                          sizeof (*local->fresh_children), -                                          gf_afr_mt_int32_t); -        if (local->fresh_children) { +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) {                  op_errno = ENOMEM;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - -        if ((read_child >= 0) && (priv->child_up[read_child])) { -                call_child = read_child; - -                local->cont.getxattr.last_tried = -1; -        } else { -                call_child = afr_first_up_child (priv); - -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_INFO, -                                "%s: no child is up", loc->path); -                        goto out; -                } -                local->cont.getxattr.last_tried = call_child; +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.getxattr.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          } -          STACK_WIND_COOKIE (frame, afr_getxattr_cbk,                             (void *) (long) call_child, -                           children[call_child], children[call_child]->fops->getxattr, +                           children[call_child], +                           children[call_child]->fops->getxattr,                             loc, name);          op_ret = 0; @@ -971,13 +933,14 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,                 struct iovec *vector, int32_t count, struct iatt *buf,                 struct iobref *iobref)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        xlator_t **     children   = NULL; -        int             unwind     = 1; -        int             last_tried = -1; -        int             this_try   = -1; -        int             read_child = -1; +        afr_private_t * priv            = NULL; +        afr_local_t *   local           = NULL; +        xlator_t **     children        = NULL; +        int             unwind          = 1; +        int32_t         *last_index     = NULL; +        int32_t         next_call_child = -1; +        int32_t         *fresh_children  = NULL; +        int32_t         read_child      = -1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -993,31 +956,21 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,          read_child = (long) cookie;          if (op_ret == -1) { -        retry: -                last_tried = local->cont.readv.last_tried; - -                if (all_tried (last_tried, priv->child_count)) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%p: all subvolumes tried, going out", -                                local->fd); +                last_index = &local->cont.readv.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0)                          goto out; -                } -                this_try = ++local->cont.readv.last_tried; - -                if (this_try == read_child) { -                        /* -                          skip the read child since if we are here -                          we must have already tried that child -                        */ -                        goto retry; -                }                  unwind = 0;                  STACK_WIND_COOKIE (frame, afr_readv_cbk,                                     (void *) (long) read_child, -                                   children[this_try], -                                   children[this_try]->fops->readv, +                                   children[next_call_child], +                                   children[next_call_child]->fops->readv,                                     local->fd, local->cont.readv.size,                                     local->cont.readv.offset);          } @@ -1039,10 +992,10 @@ afr_readv (call_frame_t *frame, xlator_t *this,          afr_private_t * priv       = NULL;          afr_local_t   * local      = NULL;          xlator_t **     children   = NULL; -        int32_t         read_child = -1;          int             call_child = 0;          int32_t         op_ret     = -1;          int32_t         op_errno   = 0; +        int32_t         read_child = -1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -1053,37 +1006,28 @@ afr_readv (call_frame_t *frame, xlator_t *this,          children = priv->children;          ALLOC_OR_GOTO (local, afr_local_t, out); -          frame->local = local; +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } -        local->fresh_children = GF_CALLOC (priv->child_count, -                                          sizeof (*local->fresh_children), -                                          gf_afr_mt_int32_t); -        if (local->fresh_children) { +        local->fresh_children = afr_fresh_children_create (priv->child_count); +        if (!local->fresh_children) {                  op_errno = ENOMEM;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); - -        if ((read_child >= 0) && (priv->child_up[read_child])) { -                call_child = read_child; - -                /* -                  if read fails from the read child, we try -                  all children starting with the first one -                */ -                local->cont.readv.last_tried = -1; - -        } else { -                call_child = afr_first_up_child (priv); -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_DEBUG, -                                "%p: no child is up", fd); -                        goto out; -                } -                local->cont.readv.last_tried = call_child; +        read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.readv.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          }          local->fd                    = fd_ref (fd); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index b8d2e27a448..2e2c57265a3 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -428,10 +428,10 @@ afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,          if (pending[curr_read_child][idx] != 0)                  goto out; -        fresh_children = GF_CALLOC (priv->child_count, sizeof (*fresh_children), -                                    gf_afr_mt_int32_t); +        fresh_children = afr_fresh_children_create (priv->child_count);          if (!fresh_children)                  goto out; +          for (new_read_child = 0; new_read_child < priv->child_count;               new_read_child++) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 0b8f96ec8b9..c6d26314e8f 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -395,28 +395,28 @@ typedef struct _afr_local {                  struct {                          int32_t mask; -                        int last_tried;  /* index of the child we tried previously */ +                        int last_index;  /* index of the child we tried previously */                  } access;                  struct { -                        int last_tried; +                        int last_index;                          ino_t ino;                  } stat;                  struct { -                        int last_tried; +                        int last_index;                          ino_t ino;                  } fstat;                  struct {                          size_t size; -                        int last_tried; +                        int last_index;                          ino_t ino;                  } readlink;                  struct {                          char *name; -                        int last_tried; +                        int last_index;                          long pathinfo_len;                  } getxattr; @@ -424,7 +424,7 @@ typedef struct _afr_local {                          ino_t ino;                          size_t size;                          off_t offset; -                        int last_tried; +                        int last_index;                  } readv;                  /* dir read */ @@ -444,20 +444,8 @@ typedef struct _afr_local {                          off_t offset;                          gf_boolean_t failed; -                        int last_tried; +                        int last_index;                  } readdir; - -                struct { -                        int32_t op_ret; -                        int32_t op_errno; - -                        size_t size; -                        off_t offset; -                        int32_t flag; - -                        int last_tried; -                } getdents; -                  /* inode write */                  struct { @@ -860,6 +848,10 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);  int  afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,                          transaction_lk_type_t lk_type); + +int +afr_first_up_child (unsigned char *child_up, size_t child_count); +  int  afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,                                     int32_t prev_read_child, @@ -870,35 +862,15 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,                                int32_t *fresh_children, int32_t prev_read_child,                                int32_t config_read_child); -/** - * first_up_child - return the index of the first child that is up - */ - -static inline int -afr_first_up_child (afr_private_t *priv) -{ -        xlator_t ** children = NULL; -        int         ret      = -1; -        int         i        = 0; - -        LOCK (&priv->lock); -        { -                children = priv->children; -                for (i = 0; i < priv->child_count; i++) { -                        if (priv->child_up[i]) { -                                ret = i; -                                break; -                        } -                } -        } -        UNLOCK (&priv->lock); - -        return ret; -} +int32_t +afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child, +                    int32_t *fresh_children, +                    int32_t *call_child, int32_t *last_index);  int32_t -afr_next_call_child (int32_t *fresh_children, size_t child_count, -                     int32_t *last_index, int32_t read_child); +afr_next_call_child (int32_t *fresh_children, unsigned char *child_up, +                     size_t child_count, int32_t *last_index, +                     int32_t read_child);  void  afr_get_fresh_children (int32_t *success_children, int32_t *sources,                          int32_t *fresh_children, unsigned int child_count); diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 300b0850443..e7ff4651da2 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -1435,14 +1435,15 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie,  		  xlator_t *this, int32_t op_ret, int32_t op_errno,  		  dict_t *dict)  { -	afr_private_t * priv     = NULL; -	afr_local_t *   local    = NULL; -	xlator_t **     children = NULL; +	afr_private_t   *priv           = NULL; +	afr_local_t     *local          = NULL; +	xlator_t        **children      = NULL; +	int             unwind          = 1; +        int32_t         *last_index     = NULL; +        int32_t         next_call_child = -1; +        int32_t         read_child      = -1; +        int32_t         *fresh_children = NULL; -	int unwind     = 1; -	int last_tried = -1; -	int this_try = -1; -        int read_child = -1;  	priv     = this->private;  	children = priv->children; @@ -1452,23 +1453,20 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie,          read_child = (long) cookie;  	if (op_ret == -1) { -        retry: -		last_tried = local->cont.getxattr.last_tried; - -		if (all_tried (last_tried, priv->child_count)) { -			goto out; -		} -		this_try = ++local->cont.getxattr.last_tried; - -                if (this_try == read_child) { -                        goto retry; -                } +		last_index = &local->cont.getxattr.last_index; +                fresh_children = local->fresh_children; +                next_call_child = afr_next_call_child (fresh_children, +                                                       local->child_up, +                                                       priv->child_count, +                                                       last_index, read_child); +                if (next_call_child < 0) +                        goto out;  		unwind = 0;  		STACK_WIND_COOKIE (frame, pump_getxattr_cbk,  				   (void *) (long) read_child, -				   children[this_try], -				   children[this_try]->fops->getxattr, +				   children[next_call_child], +				   children[next_call_child]->fops->getxattr,  				   &local->loc,  				   local->cont.getxattr.name);  	} @@ -1491,12 +1489,10 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,  	afr_private_t *   priv       = NULL;  	xlator_t **       children   = NULL;  	int               call_child = 0; -	afr_local_t     * local      = NULL; - -        int               read_child = -1; - -	int32_t op_ret   = -1; -	int32_t op_errno = 0; +	afr_local_t       *local     = NULL; +	int32_t           op_ret     = -1; +	int32_t           op_errno   = 0; +        uint64_t          read_child = 0;  	VALIDATE_OR_GOTO (frame, out); @@ -1511,6 +1507,12 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,  	ALLOC_OR_GOTO (local, afr_local_t, out);  	frame->local = local; +        op_ret = AFR_LOCAL_INIT (local, priv); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } +          if (name) {                  if (!strncmp (name, AFR_XATTR_PREFIX,                                strlen (AFR_XATTR_PREFIX))) { @@ -1543,25 +1545,17 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,                  op_errno = ENOMEM;                  goto out;          } -        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); -        if (read_child >= 0) { -                call_child = read_child; - -                local->cont.getxattr.last_tried = -1; -        } else { -                call_child = afr_first_up_child (priv); - -                if (call_child == -1) { -                        op_errno = ENOTCONN; -                        gf_log (this->name, GF_LOG_DEBUG, -                                "no child is up"); -                        goto out; -                } - -                local->cont.getxattr.last_tried = call_child; +        read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); +        op_ret = afr_get_call_child (this, local->child_up, read_child, +                                     local->fresh_children, +                                     &call_child, +                                     &local->cont.getxattr.last_index); +        if (op_ret < 0) { +                op_errno = -op_ret; +                op_ret = -1; +                goto out;          } -  	loc_copy (&local->loc, loc);  	if (name)  	  local->cont.getxattr.name       = gf_strdup (name);  | 
