summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaghavendra Bhat <raghavendra@redhat.com>2013-01-28 16:14:09 +0530
committerAnand Avati <avati@redhat.com>2013-01-29 12:50:55 -0800
commit326a47939dabff218205ca2959b9701e2e0ce47c (patch)
tree04160c152808cc1e9e074dd49354c22d7df06589
parent7cc5edb3dcbacaec65b67831018b51a5a384f513 (diff)
cluster/afr: if a subvolume is down wind the lock request to next
When one of the subvolume is down, then lock request is not attempted on that subvolume and move on to the next subvolume. /* skip over children that are down */ while ((child_index < priv->child_count) && !local->child_up[child_index]) child_index++; In the above case if there are 2 subvolumes and 2nd subvolume is down (subvolume 1 from afr's view), then after attempting lock on 1st child (i.e subvolume 0) child index is calculated to be 1. But since the 2nd child is down child_index is incremented to 2 as per the above logic and lock request is STACK_WINDed to the child with child_index 2. Since there are only 2 children for afr the child (i.e the xlator_t pointer) for child_index will be NULL. The process crashes when it dereference the NULL xlator object. Change-Id: Icd9b5ad28bac1b805e6e80d53c12d296526bedf5 BUG: 765564 Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com> Reviewed-on: http://review.gluster.org/4438 Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--tests/bugs/bug-765564.t11
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c31
2 files changed, 27 insertions, 15 deletions
diff --git a/tests/bugs/bug-765564.t b/tests/bugs/bug-765564.t
index d82f8ca37ac..0b8b8cd4f9e 100644
--- a/tests/bugs/bug-765564.t
+++ b/tests/bugs/bug-765564.t
@@ -1,6 +1,7 @@
#!/bin/bash
. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
TEST glusterd
TEST pidof glusterd
@@ -66,6 +67,16 @@ function rm_mv_correctness () {
TEST touch $M0/a;
TEST mv $M0/a $M0/b;
+#test rename fop when one of the bricks is down
+kill_brick ${V0} ${H0} ${B0}/${V0}-1;
+TEST touch $M0/h;
+TEST mv $M0/h $M0/1;
+
+TEST $CLI volume start $V0 force;
+
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1;
+find $M0 | xargs stat 2>/dev/null 1>/dev/null;
+
TEST rm_mv_correctness;
TEST umount $M0;
cleanup;
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 265330d115b..542b2959d83 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1019,6 +1019,17 @@ afr_is_entrylk (afr_internal_lock_t *int_lock,
return is_entrylk;
}
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+ else if (local->fd && !local->fd_open_on[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
@@ -1059,20 +1070,6 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
return 0;
}
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
@@ -1107,6 +1104,11 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
@@ -1144,7 +1146,6 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
case AFR_ENTRY_TRANSACTION:
/*Accounting for child_index increments on 'down'
*and 'fd-less' children */
- cookie = lockee_no * priv->child_count + child_index;
if (local->fd) {
AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,