summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-06-08 09:30:01 +0000
committerAnand Avati <avati@gluster.com>2011-06-08 11:19:01 -0700
commitd979696eae8e73bcca9eb6b8b6cdaecf2e19700a (patch)
treeee85db96f64c7d7bdc64fadcaf1e6087a2a80ecc /xlators/cluster
parent43368cffd23b9dc4b1f98cf595b0d486b9e6dec9 (diff)
cluster/afr: Read-dir should wind to the read-child first
Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2840 (files not getting self-healed when the first child goes down) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c50
1 files changed, 33 insertions, 17 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index b2a001a198a..65adc566465 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -522,10 +522,10 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t ** children = NULL;
ino_t inum = 0;
int call_child = 0;
+ int first_call_child = 0;
int ret = 0;
gf_dirent_t * entry = NULL;
gf_dirent_t * tmp = NULL;
- int child_index = -1;
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
off_t offset = 0;
@@ -535,7 +535,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- child_index = (long) cookie;
+ first_call_child = (long) cookie;
+ if (local->cont.readdir.last_tried == -1) {
+ call_child = (long) cookie;
+ } else {
+ call_child = local->cont.readdir.last_tried;
+ }
if (priv->strict_readdir) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -550,13 +555,15 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (child_went_down (op_ret, op_errno)) {
- if (all_tried (child_index, priv->child_count)) {
+ if ((call_child + 1) % priv->child_count
+ == first_call_child) {
gf_log (this->name, GF_LOG_INFO,
"all options tried going out");
goto out;
}
- call_child = ++child_index;
+ call_child = (call_child + 1) % priv->child_count;
+ local->cont.readdir.last_tried = call_child;
gf_log (this->name, GF_LOG_TRACE,
"starting readdir afresh on child %d, offset %"PRId64,
@@ -565,7 +572,7 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx->failed_over = _gf_true;
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
+ (void *) (long) first_call_child,
children[call_child],
children[call_child]->fops->readdirp, local->fd,
local->cont.readdir.size, 0);
@@ -576,10 +583,10 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != -1) {
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
inum = afr_itransform (entry->d_ino, priv->child_count,
- child_index);
+ call_child);
entry->d_ino = inum;
inum = afr_itransform (entry->d_stat.ia_ino,
- priv->child_count, child_index);
+ priv->child_count, call_child);
entry->d_stat.ia_ino = inum;
if ((local->fd->inode == local->fd->inode->table->root)
@@ -611,12 +618,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"trying to fetch non-duplicate entries "
"from offset %"PRId64", child %s",
- offset, children[child_index]->name);
+ offset, children[call_child]->name);
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) child_index,
- children[child_index],
- children[child_index]->fops->readdirp,
+ (void *) (long) first_call_child,
+ children[call_child],
+ children[call_child]->fops->readdirp,
local->fd, local->cont.readdir.size, offset);
return 0;
}
@@ -645,6 +652,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
int ret = -1;
int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -662,12 +670,20 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
frame->local = local;
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "no child is up");
- goto out;
+ read_child = afr_read_child (this, fd->inode);
+
+ if ((read_child >= 0) && (priv->child_up[read_child])) {
+ call_child = read_child;
+ local->cont.readdir.last_tried = -1;
+ } else {
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_INFO,
+ "no child is up");
+ goto out;
+ }
+ local->cont.readdir.last_tried = call_child;
}
local->fd = fd_ref (fd);