summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorPranith K <pranithk@gluster.com>2011-07-14 08:07:46 +0000
committerAnand Avati <avati@gluster.com>2011-07-17 07:45:11 -0700
commitb0a3a3fda3f0993cd8c0e1b135bb569b6543e7c0 (patch)
tree59e736d4246af1ce27d4bf7c9e5e42648d4c051b /xlators/cluster/afr
parentbfc0e16e43815ab6d6e67f4bd26694ebd72b3360 (diff)
cluster/afr: Choose next call child from fresh-children for inode-read-fops
Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2840 (files not getting self-healed when the first child goes down) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-common.c37
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c95
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h5
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c548
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c4
-rw-r--r--xlators/cluster/afr/src/afr.h64
-rw-r--r--xlators/cluster/afr/src/pump.c80
7 files changed, 391 insertions, 442 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e8afc6d8de6..21f7b4e4356 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -452,8 +452,9 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
* in execution there is a chance for inode's read_ctx to change.
*/
int32_t
-afr_next_call_child (int32_t *fresh_children, size_t child_count,
- int32_t *last_index, int32_t read_child)
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child)
{
int next_index = 0;
int32_t next_call_child = -1;
@@ -463,12 +464,12 @@ afr_next_call_child (int32_t *fresh_children, size_t child_count,
next_index = *last_index;
retry:
next_index++;
- if (next_index >= child_count)
+ if ((next_index >= child_count) ||
+ (fresh_children[next_index] == -1))
goto out;
- if (fresh_children[next_index] == read_child)
+ if ((fresh_children[next_index] == read_child) ||
+ (!child_up[fresh_children[next_index]]))
goto retry;
- if (fresh_children[next_index] == -1)
- goto out;
*last_index = next_index;
next_call_child = fresh_children[next_index];
out:
@@ -1475,7 +1476,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
if (ret == 0) {
/* lookup is a revalidate */
- local->read_child_index = afr_inode_get_read_ctx (this, loc->inode,
+ local->read_child_index = afr_inode_get_read_ctx (this,
+ loc->inode,
NULL);
} else {
LOCK (&priv->read_child_lock);
@@ -3070,6 +3072,24 @@ out:
}
int
+afr_first_up_child (unsigned char *child_up, size_t child_count)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+
+ for (i = 0; i < child_count; i++) {
+ if (child_up[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
{
local->op_ret = -1;
@@ -3147,7 +3167,8 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
if (priv->optimistic_change_log && child_up_count == priv->child_count)
local->optimistic_change_log = 1;
- local->first_up_child = afr_first_up_child (priv);
+ local->first_up_child = afr_first_up_child (local->child_up,
+ priv->child_count);
local->child_errno = GF_CALLOC (sizeof (*local->child_errno),
priv->child_count,
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 8593d0c14c5..ce941f0189e 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -49,7 +49,6 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
-
int
afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
{
@@ -517,24 +516,38 @@ int32_t
afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int child_index = -1;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int32_t next_call_child = -1;
+ int ret = 0;
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+ int32_t *last_index = NULL;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ off_t offset = 0;
+ int32_t call_child = -1;
priv = this->private;
children = priv->children;
local = frame->local;
- child_index = (long) cookie;
+ read_child = (long) cookie;
+ last_index = &local->cont.readdir.last_index;
+ fresh_children = local->fresh_children;
+
+ /* the value of the last_index changes if afr_next_call_child is
+ * called. So to find the call_child of this callback use last_index
+ * before the next_call_child call.
+ */
+ if (*last_index == -1)
+ call_child = read_child;
+ else
+ call_child = fresh_children[*last_index];
if (priv->strict_readdir) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -548,25 +561,25 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- if (child_went_down (op_ret, op_errno)) {
- if (all_tried (child_index, priv->child_count)) {
- gf_log (this->name, GF_LOG_INFO,
- "all options tried going out");
+ if (op_ret == -1) {
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index,
+ read_child);
+ if (next_call_child < 0)
goto out;
- }
-
- call_child = ++child_index;
-
gf_log (this->name, GF_LOG_TRACE,
"starting readdir afresh on child %d, offset %"PRId64,
- call_child, (uint64_t) 0);
+ next_call_child, (uint64_t) 0);
fd_ctx->failed_over = _gf_true;
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, local->fd,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->readdirp,
+ local->fd,
local->cont.readdir.size, 0);
return 0;
}
@@ -603,12 +616,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"trying to fetch non-duplicate entries "
"from offset %"PRId64", child %s",
- offset, children[child_index]->name);
+ offset, children[call_child]->name);
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) child_index,
- children[child_index],
- children[child_index]->fops->readdirp,
+ (void *) (long) read_child,
+ children[call_child],
+ children[call_child]->fops->readdirp,
local->fd, local->cont.readdir.size, offset);
return 0;
}
@@ -623,7 +636,6 @@ out:
return 0;
}
-
int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset, int whichop)
@@ -637,6 +649,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
int ret = -1;
int32_t op_ret = -1;
int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -646,19 +659,29 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
children = priv->children;
ALLOC_OR_GOTO (local, afr_local_t, out);
+ frame->local = local;
+
ret = AFR_LOCAL_INIT (local, priv);
if (ret < 0) {
op_errno = -ret;
goto out;
}
- frame->local = local;
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "no child is up");
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 40c7b6aef28..3143cb97368 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -38,11 +38,6 @@ afr_readdirp (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset);
int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
-
-int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags);
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index f2507f07ef4..caac56f6596 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -63,13 +63,14 @@ int32_t
afr_access_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -79,27 +80,21 @@ afr_access_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.access.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: all subvolumes tried, going out",
- local->loc.path);
+ last_index = &local->cont.access.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.access.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_access_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->access,
+ children[next_call_child],
+ children[next_call_child]->fops->access,
&local->loc, local->cont.access.mask);
}
@@ -115,13 +110,13 @@ out:
int32_t
afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t read_child = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -133,32 +128,31 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
children = priv->children;
ALLOC_OR_GOTO (local, afr_local_t, out);
+ frame->local = local;
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
- op_errno = ENOMEM;
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- local->cont.access.last_tried = -1;
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "%s: no child is up", loc->path);
- goto out;
- }
- local->cont.access.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.access.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
loc_copy (&local->loc, loc);
@@ -166,7 +160,8 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
STACK_WIND_COOKIE (frame, afr_access_cbk,
(void *) (long) call_child,
- children[call_child], children[call_child]->fops->access,
+ children[call_child],
+ children[call_child]->fops->access,
loc, mask);
op_ret = 0;
@@ -187,13 +182,14 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iatt *buf)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -203,27 +199,21 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
local = frame->local;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.stat.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: all subvolumes tried, going out",
- local->loc.path);
+ last_index = &local->cont.stat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.stat.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_stat_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->stat,
+ children[next_call_child],
+ children[next_call_child]->fops->stat,
&local->loc);
}
@@ -239,13 +229,13 @@ out:
int32_t
afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
int call_child = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -257,35 +247,30 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
children = priv->children;
ALLOC_OR_GOTO (local, afr_local_t, out);
-
frame->local = local;
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- local->cont.stat.last_tried = -1;
-
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "%s: no child is up", loc->path);
- goto out;
- }
- local->cont.stat.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.stat.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
-
loc_copy (&local->loc, loc);
local->cont.stat.ino = loc->inode->ino;
@@ -313,13 +298,14 @@ int32_t
afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -329,27 +315,21 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.fstat.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%p: all subvolumes tried, going out",
- local->fd);
+ last_index = &local->cont.fstat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.fstat.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_fstat_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->fstat,
+ children[next_call_child],
+ children[next_call_child]->fops->fstat,
local->fd);
}
@@ -366,13 +346,13 @@ int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
int call_child = 0;
- int32_t read_child = -1;
int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -384,36 +364,36 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ VALIDATE_OR_GOTO (fd->inode, out);
+ ALLOC_OR_GOTO (local, afr_local_t, out);
frame->local = local;
- VALIDATE_OR_GOTO (fd->inode, out);
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
- local->cont.fstat.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "%p: no child is up", fd);
- goto out;
- }
- local->cont.fstat.last_tried = call_child;
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.fstat.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
local->cont.fstat.ino = fd->inode->ino;
@@ -442,13 +422,14 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
const char *buf, struct iatt *sbuf)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -458,26 +439,20 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.readlink.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: all subvolumes tried, going out",
- local->loc.path);
+ last_index = &local->cont.readlink.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.readlink.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_readlink_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readlink,
+ children[next_call_child],
+ children[next_call_child]->fops->readlink,
&local->loc,
local->cont.readlink.size);
}
@@ -495,13 +470,13 @@ int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
loc_t *loc, size_t size)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t read_child = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -513,34 +488,28 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
children = priv->children;
ALLOC_OR_GOTO (local, afr_local_t, out);
-
frame->local = local;
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- local->cont.readlink.last_tried = -1;
-
- } else {
- call_child = afr_first_up_child (priv);
-
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "%s: no child is up", loc->path);
- goto out;
- }
-
- local->cont.readlink.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readlink.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
loc_copy (&local->loc, loc);
@@ -550,7 +519,8 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, afr_readlink_cbk,
(void *) (long) call_child,
- children[call_child], children[call_child]->fops->readlink,
+ children[call_child],
+ children[call_child]->fops->readlink,
loc, size);
op_ret = 0;
@@ -622,13 +592,14 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *dict)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -638,26 +609,20 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.getxattr.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: all subvolumes tried, going out",
- local->loc.path);
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.getxattr.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->getxattr,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
&local->loc,
local->cont.getxattr.name);
}
@@ -790,16 +755,16 @@ int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t * local = NULL;
- xlator_list_t * trav = NULL;
- xlator_t ** sub_volumes = NULL;
- int read_child = -1;
- int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
@@ -814,6 +779,12 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
ALLOC_OR_GOTO (local, afr_local_t, out);
frame->local = local;
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
loc_copy (&local->loc, loc);
if (name)
local->cont.getxattr.name = gf_strdup (name);
@@ -908,36 +879,27 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- local->cont.getxattr.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
-
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "%s: no child is up", loc->path);
- goto out;
- }
- local->cont.getxattr.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
-
STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
(void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
+ children[call_child],
+ children[call_child]->fops->getxattr,
loc, name);
op_ret = 0;
@@ -971,13 +933,14 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
struct iovec *vector, int32_t count, struct iatt *buf,
struct iobref *iobref)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -993,31 +956,21 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.readv.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%p: all subvolumes tried, going out",
- local->fd);
+ last_index = &local->cont.readv.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.readv.last_tried;
-
- if (this_try == read_child) {
- /*
- skip the read child since if we are here
- we must have already tried that child
- */
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readv,
+ children[next_call_child],
+ children[next_call_child]->fops->readv,
local->fd, local->cont.readv.size,
local->cont.readv.offset);
}
@@ -1039,10 +992,10 @@ afr_readv (call_frame_t *frame, xlator_t *this,
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
xlator_t ** children = NULL;
- int32_t read_child = -1;
int call_child = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1053,37 +1006,28 @@ afr_readv (call_frame_t *frame, xlator_t *this,
children = priv->children;
ALLOC_OR_GOTO (local, afr_local_t, out);
-
frame->local = local;
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- /*
- if read fails from the read child, we try
- all children starting with the first one
- */
- local->cont.readv.last_tried = -1;
-
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "%p: no child is up", fd);
- goto out;
- }
- local->cont.readv.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readv.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
local->fd = fd_ref (fd);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index b8d2e27a448..2e2c57265a3 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -428,10 +428,10 @@ afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (pending[curr_read_child][idx] != 0)
goto out;
- fresh_children = GF_CALLOC (priv->child_count, sizeof (*fresh_children),
- gf_afr_mt_int32_t);
+ fresh_children = afr_fresh_children_create (priv->child_count);
if (!fresh_children)
goto out;
+
for (new_read_child = 0; new_read_child < priv->child_count;
new_read_child++) {
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 0b8f96ec8b9..c6d26314e8f 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -395,28 +395,28 @@ typedef struct _afr_local {
struct {
int32_t mask;
- int last_tried; /* index of the child we tried previously */
+ int last_index; /* index of the child we tried previously */
} access;
struct {
- int last_tried;
+ int last_index;
ino_t ino;
} stat;
struct {
- int last_tried;
+ int last_index;
ino_t ino;
} fstat;
struct {
size_t size;
- int last_tried;
+ int last_index;
ino_t ino;
} readlink;
struct {
char *name;
- int last_tried;
+ int last_index;
long pathinfo_len;
} getxattr;
@@ -424,7 +424,7 @@ typedef struct _afr_local {
ino_t ino;
size_t size;
off_t offset;
- int last_tried;
+ int last_index;
} readv;
/* dir read */
@@ -444,20 +444,8 @@ typedef struct _afr_local {
off_t offset;
gf_boolean_t failed;
- int last_tried;
+ int last_index;
} readdir;
-
- struct {
- int32_t op_ret;
- int32_t op_errno;
-
- size_t size;
- off_t offset;
- int32_t flag;
-
- int last_tried;
- } getdents;
-
/* inode write */
struct {
@@ -860,6 +848,10 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
int
afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
transaction_lk_type_t lk_type);
+
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count);
+
int
afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
int32_t prev_read_child,
@@ -870,35 +862,15 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
int32_t *fresh_children, int32_t prev_read_child,
int32_t config_read_child);
-/**
- * first_up_child - return the index of the first child that is up
- */
-
-static inline int
-afr_first_up_child (afr_private_t *priv)
-{
- xlator_t ** children = NULL;
- int ret = -1;
- int i = 0;
-
- LOCK (&priv->lock);
- {
- children = priv->children;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i]) {
- ret = i;
- break;
- }
- }
- }
- UNLOCK (&priv->lock);
-
- return ret;
-}
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index);
int32_t
-afr_next_call_child (int32_t *fresh_children, size_t child_count,
- int32_t *last_index, int32_t read_child);
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child);
void
afr_get_fresh_children (int32_t *success_children, int32_t *sources,
int32_t *fresh_children, unsigned int child_count);
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 300b0850443..e7ff4651da2 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1435,14 +1435,15 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *dict)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
priv = this->private;
children = priv->children;
@@ -1452,23 +1453,20 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.getxattr.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.getxattr.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
unwind = 0;
STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->getxattr,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
&local->loc,
local->cont.getxattr.name);
}
@@ -1491,12 +1489,10 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
afr_private_t * priv = NULL;
xlator_t ** children = NULL;
int call_child = 0;
- afr_local_t * local = NULL;
-
- int read_child = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1511,6 +1507,12 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
ALLOC_OR_GOTO (local, afr_local_t, out);
frame->local = local;
+ op_ret = AFR_LOCAL_INIT (local, priv);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
if (name) {
if (!strncmp (name, AFR_XATTR_PREFIX,
strlen (AFR_XATTR_PREFIX))) {
@@ -1543,25 +1545,17 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
op_errno = ENOMEM;
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- if (read_child >= 0) {
- call_child = read_child;
-
- local->cont.getxattr.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
-
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->cont.getxattr.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
}
-
loc_copy (&local->loc, loc);
if (name)
local->cont.getxattr.name = gf_strdup (name);