summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-08-19 15:56:49 +0530
committerVijay Bellur <vijay@gluster.com>2011-08-19 23:18:53 -0700
commitd9c6513289ca33204cdc110112ff8e45cbc8970a (patch)
treed68097996f3dd9b0882343be1c5d0b7bf6758315
parent5e89fda5180e66b1757bc620dfdb5701ce4d43f1 (diff)
cluster/afr: Update fresh_children in lookup if no other ops in progress
If write/truncate fails we should remove the child that failed the fop from the fresh children. The previous code assumes that the children that succeeded the fop are fresh children, which is wrong. Fixed that in this patch. Change-Id: I1e6e21e20faea00516a0fdd2e95f2d7e9cf9076d BUG: 3411 Reviewed-on: http://review.gluster.com/263 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c220
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c2
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c12
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c4
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c69
-rw-r--r--xlators/cluster/afr/src/afr.h30
9 files changed, 236 insertions, 115 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index ca470716aee..94335bd0298 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -197,9 +197,9 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
if (!ctx)
goto unlock;
- switch (params->mask_type) {
- case AFR_ICTX_READ_CHILD_MASK:
- fresh_children = params->u.read_ctx.fresh_children;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
read_child = (int32_t)(ctx->masks &
AFR_ICTX_READ_CHILD_MASK);
params->u.read_ctx.read_child = read_child;
@@ -208,13 +208,16 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
for (i = 0; i < priv->child_count; i++)
fresh_children[i] = ctx->fresh_children[i];
break;
- case AFR_ICTX_OPENDIR_DONE_MASK:
+ case AFR_INODE_GET_OPENDIR_DONE:
params->u.value = ctx->masks &
AFR_ICTX_OPENDIR_DONE_MASK;
break;
- case AFR_ICTX_SPLIT_BRAIN_MASK:
+ case AFR_INODE_GET_SPLIT_BRAIN:
params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK;
break;
+ default:
+ GF_ASSERT (0);
+ break;
}
}
unlock:
@@ -226,7 +229,7 @@ afr_is_split_brain (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
+ params.op = AFR_INODE_GET_SPLIT_BRAIN;
afr_inode_get_ctx (this, inode, &params);
return params.u.value;
}
@@ -236,7 +239,7 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
afr_inode_get_ctx (this, inode, &params);
return params.u.value;
}
@@ -247,26 +250,31 @@ afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_READ_CHILD_MASK;
- params.u.read_ctx.fresh_children = fresh_children;
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
afr_inode_get_ctx (this, inode, &params);
return params.u.read_ctx.read_child;
}
void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
- int i = 0;
- rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ GF_ASSERT (read_child >= 0);
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
+}
- /* avoid memcpy as int, int32_t are used interchangeably
- */
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
for (i = 0; i < child_count; i++) {
if (fresh_children)
ctx->fresh_children[i] = fresh_children[i];
@@ -276,26 +284,42 @@ afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
}
void
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *stale_children, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT (stale_children);
+ afr_inode_ctx_set_read_child (ctx, read_child);
+ for (i = 0; i < child_count; i++) {
+ if ((ctx->fresh_children[i] == -1) || (stale_children[i] == -1))
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+}
+
+void
afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
- rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
}
void
afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
if (set) {
- rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
+ remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
} else {
ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
}
@@ -314,6 +338,7 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
gf_boolean_t set = _gf_false;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
priv = this->private;
LOCK (&inode->lock);
@@ -324,21 +349,31 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
if (!ctx)
goto unlock;
- switch (params->mask_type) {
- case AFR_ICTX_READ_CHILD_MASK:
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.fresh_children;
+ fresh_children = params->u.read_ctx.children;
afr_inode_ctx_set_read_ctx (ctx, read_child,
fresh_children,
priv->child_count);
break;
- case AFR_ICTX_OPENDIR_DONE_MASK:
+ case AFR_INODE_RM_STALE_CHILDREN:
+ read_child = params->u.read_ctx.read_child;
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx, read_child,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
afr_inode_ctx_set_opendir_done (ctx);
break;
- case AFR_ICTX_SPLIT_BRAIN_MASK:
+ case AFR_INODE_SET_SPLIT_BRAIN:
set = params->u.value;
afr_inode_ctx_set_splitbrain (ctx, set);
break;
+ default:
+ GF_ASSERT (0);
+ break;
}
ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
if (ret) {
@@ -356,7 +391,7 @@ afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
+ params.op = AFR_INODE_SET_SPLIT_BRAIN;
params.u.value = set;
afr_inode_set_ctx (this, inode, &params);
}
@@ -366,7 +401,7 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
afr_inode_set_ctx (this, inode, &params);
}
@@ -375,13 +410,34 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
int32_t *fresh_children)
{
afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+ priv = this->private;
GF_ASSERT (read_child >= 0);
GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
+
+ params.op = AFR_INODE_SET_READ_CTX;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (stale_children);
- params.mask_type = AFR_ICTX_READ_CHILD_MASK;
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.fresh_children = fresh_children;
+ params.u.read_ctx.children = stale_children;
afr_inode_set_ctx (this, inode, &params);
}
@@ -885,13 +941,14 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
uuid_copy (loc->pargfid, postparent->ia_gfid);
}
-void
+int
afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
int32_t read_child = -1;
struct iatt *buf = NULL;
struct iatt *postparent = NULL;
dict_t **xattr = NULL;
+ int ret = 0;
GF_ASSERT (local);
@@ -901,6 +958,12 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
NULL);
+ if (read_child < 0) {
+ ret = -EIO;
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
read_child);
*xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
@@ -911,6 +974,8 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
/* fix for RT #602 */
local->cont.lookup.inode->ia_type = buf->ia_type;
}
+out:
+ return ret;
}
static void
@@ -1101,9 +1166,9 @@ out:
}
static inline gf_boolean_t
-afr_is_self_heal_running (afr_local_t *local)
+afr_is_transaction_running (afr_local_t *local)
{
- GF_ASSERT (local);
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
}
@@ -1397,7 +1462,7 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
afr_lookup_set_self_heal_data (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_self_heal_running (local))
+ if (afr_is_transaction_running (local))
goto out;
afr_launch_self_heal (frame, this, local->cont.lookup.inode,
@@ -1478,21 +1543,25 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
goto out;
}
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
+ if (!afr_is_transaction_running (local)) {
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
}
- afr_lookup_build_response_params (local, this);
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
if (afr_is_fresh_lookup (&local->loc, this)) {
afr_update_loc_gfids (&local->loc,
&local->cont.lookup.buf,
@@ -1740,12 +1809,12 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.bufs = iatts;
- success_children = afr_fresh_children_create (child_count);
+ success_children = afr_children_create (child_count);
if (NULL == success_children)
goto out;
local->cont.lookup.success_children = success_children;
- local->fresh_children = afr_fresh_children_create (child_count);
+ local->fresh_children = afr_children_create (child_count);
if (NULL == local->fresh_children)
goto out;
@@ -1774,6 +1843,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
local->op_ret = -1;
frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
op_errno = ENOENT;
@@ -3493,7 +3563,7 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
if (!local->pending)
goto out;
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children)
goto out;
@@ -3525,52 +3595,72 @@ afr_reset_children (int32_t *fresh_children, int32_t child_count)
}
int32_t*
-afr_fresh_children_create (int32_t child_count)
+afr_children_create (int32_t child_count)
{
- int32_t *fresh_children = NULL;
+ int32_t *children = NULL;
int i = 0;
GF_ASSERT (child_count > 0);
- fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children),
- gf_afr_mt_int32_t);
- if (NULL == fresh_children)
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
goto out;
for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+ children[i] = -1;
out:
- return fresh_children;
+ return children;
}
void
-afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
- int32_t child_count)
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
{
gf_boolean_t child_found = _gf_false;
int i = 0;
for (i = 0; i < child_count; i++) {
- if (fresh_children[i] == -1)
+ if (children[i] == -1)
break;
- if (fresh_children[i] == child) {
+ if (children[i] == child) {
child_found = _gf_true;
break;
}
}
+
if (!child_found) {
GF_ASSERT (i < child_count);
- fresh_children[i] = child;
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
}
}
int
-afr_get_children_count (int32_t *fresh_children, unsigned int child_count)
+afr_get_children_count (int32_t *children, unsigned int child_count)
{
int count = 0;
int i = 0;
for (i = 0; i < child_count; i++) {
- if (fresh_children[i] == -1)
+ if (children[i] == -1)
break;
count++;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 0fac7324c78..645da2a6c57 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -668,7 +668,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 7d7cc3d80c8..f8157482758 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -136,7 +136,7 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -254,7 +254,7 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -375,7 +375,7 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -495,7 +495,7 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -879,7 +879,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -1013,7 +1013,7 @@ afr_readv (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 15b659fa837..f66bdff8446 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1468,8 +1468,8 @@ afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
sh->entrybuf.ia_gfid)))
continue;
- afr_fresh_children_add_child (sh->fresh_children,
- i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
afr_sh_purge_entry_common (frame, this,
@@ -2094,9 +2094,9 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
priv->child_count,
gf_afr_mt_int32_t);
}
- sh->success_children = afr_fresh_children_create (priv->child_count);
- sh->fresh_children = afr_fresh_children_create (priv->child_count);
- sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count);
+ sh->success_children = afr_children_create (priv->child_count);
+ sh->fresh_children = afr_children_create (priv->child_count);
+ sh->fresh_parent_dirs = afr_children_create (priv->child_count);
FRAME_SU_DO (sh_frame, afr_local_t);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 74a1bf35371..dcaad9c8b47 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -310,7 +310,7 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
sh = &local->self_heal;
i = (long)cookie;
- afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
call_count = afr_frame_return (frame);
if (call_count == 0) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 556ea8027a3..9e80cb3d5a5 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -130,7 +130,7 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
i = (long)cookie;
- afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_INFO,
"%s: failed to erase pending xattrs on %s (%s)",
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index bacf2758880..5445132ab8c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -160,8 +160,8 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
(!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_fresh_children_add_child (sh->fresh_children, i,
- priv->child_count);
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
call_count = afr_frame_return (frame);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 1fb0781d8b7..fc030433b69 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -404,53 +404,67 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
void
-afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_transaction_type type)
+afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, afr_transaction_type type)
{
- int curr_read_child = -1;
- int new_read_child = -1;
+ int i = -1;
+ int count = 0;
+ int read_child = -1;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int **pending = NULL;
int idx = 0;
+ int32_t *stale_children = NULL;
int32_t *fresh_children = NULL;
- size_t success_count = 0;
+ gf_boolean_t rm_stale_children = _gf_false;
idx = afr_index_for_transaction_type (type);
priv = this->private;
local = frame->local;
- curr_read_child = afr_inode_get_read_ctx (this, inode, NULL);
pending = local->pending;
- GF_ASSERT (curr_read_child >= 0);
-
- if (pending[curr_read_child][idx] != 0)
+ stale_children = afr_children_create (priv->child_count);
+ if (!stale_children)
goto out;
- fresh_children = afr_fresh_children_create (priv->child_count);
- if (!fresh_children)
- goto out;
+ fresh_children = local->fresh_children;
+ read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
- for (new_read_child = 0; new_read_child < priv->child_count;
- new_read_child++) {
+ GF_ASSERT (read_child >= 0);
- if (!priv->child_up[new_read_child])
- /* child is down */
- continue;
+ if (pending[read_child][idx] == 0)
+ read_child = -1;
- if (pending[new_read_child][idx] == 0)
- /* op just failed */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_is_child_present (fresh_children,
+ priv->child_count, i))
continue;
- fresh_children[success_count] = new_read_child;
- success_count++;
+ if ((!priv->child_up[i]) || (pending[i][idx] == 0)) {
+ /* child is down or op failed on it */
+ rm_stale_children = _gf_true;
+ afr_children_rm_child (fresh_children, i,
+ priv->child_count);
+ stale_children[count++] = i;
+ }
+ }
+
+ if (!rm_stale_children) {
+ GF_ASSERT (read_child >= 0);
+ goto out;
+ }
+
+ if (fresh_children[0] == -1) {
+ //All children failed. leave as-is
+ goto out;
}
- afr_inode_set_read_ctx (this, inode, fresh_children[0],
- fresh_children);
+ if (read_child == -1)
+ read_child = fresh_children[0];
+ afr_inode_rm_stale_children (this, inode, read_child, stale_children);
out:
- if (fresh_children)
- GF_FREE (fresh_children);
+ if (stale_children)
+ GF_FREE (stale_children);
return;
}
@@ -478,8 +492,9 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
local->child_up, local->transaction.type);
if (local->fd)
- afr_update_read_child (frame, this, local->fd->inode,
- local->transaction.type);
+ afr_transaction_rm_stale_children (frame, this,
+ local->fd->inode,
+ local->transaction.type);
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 2ca13078ef7..236a24a6057 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -48,13 +48,23 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ AFR_INODE_SET_READ_CTX = 1,
+ AFR_INODE_RM_STALE_CHILDREN,
+ AFR_INODE_SET_OPENDIR_DONE,
+ AFR_INODE_SET_SPLIT_BRAIN,
+ AFR_INODE_GET_READ_CTX,
+ AFR_INODE_GET_OPENDIR_DONE,
+ AFR_INODE_GET_SPLIT_BRAIN,
+} afr_inode_op_t;
+
typedef struct afr_inode_params_ {
- uint64_t mask_type;
+ afr_inode_op_t op;
union {
gf_boolean_t value;
struct {
int32_t read_child;
- int32_t *fresh_children;
+ int32_t *children;
} read_ctx;
} u;
} afr_inode_params_t;
@@ -869,7 +879,7 @@ afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
int32_t *
-afr_fresh_children_create (int32_t child_count);
+afr_children_create (int32_t child_count);
int
AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
@@ -902,19 +912,22 @@ afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
int32_t read_child);
void
afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count);
+ int32_t *children, unsigned int child_count);
void
-afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
+afr_children_add_child (int32_t *children, int32_t child,
int32_t child_count);
void
-afr_reset_children (int32_t *fresh_children, int32_t child_count);
+afr_children_rm_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_reset_children (int32_t *children, int32_t child_count);
gf_boolean_t
afr_error_more_important (int32_t old_errno, int32_t new_errno);
int
afr_errno_count (int32_t *children, int *child_errno,
unsigned int child_count, int32_t op_errno);
int
-afr_get_children_count (int32_t *fresh_children, unsigned int child_count);
+afr_get_children_count (int32_t *children, unsigned int child_count);
gf_boolean_t
afr_is_child_present (int32_t *success_children, int32_t child_count,
int32_t child);
@@ -941,4 +954,7 @@ afr_transaction_type_get (ia_type_t ia_type);
int32_t
afr_resultant_errno_get (int32_t *children,
int *child_errno, unsigned int child_count);
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *stale_children);
#endif /* __AFR_H__ */