summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-common.c
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-09-20 18:30:42 +0530
committerVijay Bellur <vijay@gluster.com>2011-09-21 04:25:15 -0700
commit03591027b06c556baa95c6fa4569be0bff4adcd8 (patch)
treeacb3ff42e7df960a3d294916487e27e6757e0258 /xlators/cluster/afr/src/afr-common.c
parent82d1a445b92526629d699f947a2d2bd029c8db75 (diff)
cluster/afr: Make local->child_up immutable
Afr transaction performs lock, pre-op, op, post-op and unlock steps in that order. The child_up[] is overloaded with the information of where all the first two steps succeeded. This works perfectly fine for Transaction, but the locking/unlocking part of the code is re-used by data self-heal. In that each loop_frame does lock, rchecksum, read-from-source and write-to-sinks, unlock steps. Rchecksum fop assumes that the fop needs to happen on one source + all sinks and sets the call_count to that number. But if the lock step fails on any of the sinks it will mark the child_up of that child to 0, which will result in call_count mismatch and the frame will hang thinking that some more cbks need to come. When this happens loop_frame will never go to unlock step leading to hangs on that file. Change-Id: I3dd0449cc6193a980bacf637d935881f4b22210a BUG: 3597 Reviewed-on: http://review.gluster.com/474 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amar@gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-common.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 0e4e9735503..2e5ca71b219 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -762,6 +762,7 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->transaction.pre_op);
GF_FREE (local->transaction.child_errno);
GF_FREE (local->child_errno);
GF_FREE (local->transaction.eager_lock);
@@ -936,6 +937,13 @@ afr_locked_children_count (unsigned char *children, unsigned int child_count)
return afr_set_elem_count_get (children, child_count);
}
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count)
+{
+ return afr_set_elem_count_get (pre_op, child_count);
+}
+
gf_boolean_t
afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
{
@@ -3680,11 +3688,17 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (local->fd) {
local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
priv->child_count,
- gf_afr_mt_int32_t);
+ gf_afr_mt_char);
if (!local->fd_open_on)
goto out;
}
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
for (i = 0; i < priv->child_count; i++) {
local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
3, /* data + metadata + entry */