summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2017-06-12 22:06:18 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-06-20 13:49:23 +0000
commit6945c9b3dc3afd290ee12cd5f90a6c538b2aaf14 (patch)
treee08add9b801c4cbc8b06c400b706c6bf3d793f17
parent808608b33da4beb100d285cc8ed54afe6cb054bc (diff)
cluster/afr: Implement quorum for lk fop
Problem: At the moment when we have replica 3 or arbiter setup, even when lk succeeds on just one brick we give success to application which is wrong Fix: Consider quorum-number of successes as success when quorum is enabled. >BUG: 1461792 >Change-Id: I5789e6eb5defb68f8a0eb9cd594d316f5cdebaea >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> >Reviewed-on: https://review.gluster.org/17524 >Smoke: Gluster Build System <jenkins@build.gluster.org> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.org> >Reviewed-by: Ravishankar N <ravishankar@redhat.com> BUG: 1462661 Change-Id: I5789e6eb5defb68f8a0eb9cd594d316f5cdebaea Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: https://review.gluster.org/17578 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Ravishankar N <ravishankar@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
-rw-r--r--tests/basic/afr/lk-quorum.t255
-rw-r--r--xlators/cluster/afr/src/afr-common.c56
-rw-r--r--xlators/cluster/afr/src/afr.h5
3 files changed, 293 insertions, 23 deletions
diff --git a/tests/basic/afr/lk-quorum.t b/tests/basic/afr/lk-quorum.t
new file mode 100644
index 0000000..ad14365
--- /dev/null
+++ b/tests/basic/afr/lk-quorum.t
@@ -0,0 +1,255 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for quorum-type option for replica 2
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'fixed' and quorum-count 2
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^fixed$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^2$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should fail
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'fixed' and quorum-count 1
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should succeed
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'auto'
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^auto$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When first brick is down lock/unlock should fail
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#When second brick is down lock/unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for replica 3
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should succeed
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#kill 3rd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#When any two of the bricks are down lock/unlock should fail
+#kill first,second bricks
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#kill 2nd,3rd bricks
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#kill 1st,3rd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 6c05374..0643204 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3835,7 +3835,7 @@ unwind:
static int
afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int child_index = (long)cookie;
@@ -4215,15 +4215,27 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
dict_t *xdata)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
int call_count = -1;
+ int child_index = (long)cookie;
local = frame->local;
- call_count = afr_frame_return (frame);
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock, xdata);
+ NULL, local->xdata_rsp);
return 0;
}
@@ -4245,7 +4257,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
+ NULL, local->xdata_rsp);
return 0;
}
@@ -4255,8 +4267,8 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
+ STACK_WIND_COOKIE (frame, afr_lk_unlock_cbk,
+ (void *) (long) i, priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
&local->cont.lk.user_flock, NULL);
@@ -4272,12 +4284,12 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int child_index = -1;
-/* int ret = 0; */
local = frame->local;
@@ -4285,9 +4297,10 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
child_index = (long) cookie;
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
local->op_ret = -1;
- local->op_errno = op_errno;
+ local->op_errno = EAGAIN;
afr_lk_unlock (frame, this);
return 0;
@@ -4307,15 +4320,20 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock, xdata);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
+ &local->cont.lk.user_flock,
+ local->xdata_req);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum (local->cont.lk.locked_nodes, this)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock, NULL);
+ afr_lk_unlock (frame, this);
} else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno (local, priv);
+
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
+ &local->cont.lk.ret_flock, local->xdata_rsp);
}
return 0;
@@ -4354,11 +4372,13 @@ afr_lk (call_frame_t *frame, xlator_t *this,
local->cont.lk.cmd = cmd;
local->cont.lk.user_flock = *flock;
local->cont.lk.ret_flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
- fd, cmd, flock, xdata);
+ fd, cmd, flock, local->xdata_req);
return 0;
out:
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 3a47ba4..cf736ed 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -875,11 +875,6 @@ typedef struct afr_granular_esh_args {
mismatch */
} afr_granular_esh_args_t;
-/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
-
int
afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
unsigned char *readable, int *event_p, int type);