From 885c56b6f3c43cea0b27345f47f5522b42ebf278 Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Wed, 8 Aug 2018 10:30:31 +0530 Subject: tests: fix brick check orders fix brick checks for validating-server-quorum.t & quorum-validation.t ...and make brick_up_status_1 function more generic. Also fix a timing issue in bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t Change-Id: I797ef4cec5b160aafa979bae7151b1e99fcb48ac Updates: bz#1603063 Signed-off-by: Atin Mukherjee --- tests/basic/tier/new-tier-cmds.t | 4 +- tests/bitrot/bug-1294786.t | 4 +- ...le-shared-storage-and-remove-brick-validation.t | 4 +- tests/bugs/glusterd/quorum-validation.t | 31 ++++++++------- .../reset-brick-and-daemons-follow-quorum.t | 4 +- tests/bugs/glusterd/validating-server-quorum.t | 45 ++++++++++++++-------- ...e-with-other-processes-accessing-mounted-path.t | 5 +++ ...51-snapshot-creation-failed-after-brick-reset.t | 2 +- tests/cluster.rc | 10 ++--- 9 files changed, 66 insertions(+), 43 deletions(-) diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t index 2c48e027b1b..b9c9390536f 100644 --- a/tests/basic/tier/new-tier-cmds.t +++ b/tests/basic/tier/new-tier-cmds.t @@ -88,8 +88,8 @@ TEST $glusterd_2; EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; # Make sure we check that the *bricks* are up and not just the node. >:-( -EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 brick_up_status_1 $V0 $H2 $B2/${V0}_b2 -EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 brick_up_status_1 $V0 $H2 $B2/${V0}_h2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2 # Parsing normal output doesn't work because of line-wrap issues on our # regression machines, and the version of xmllint there doesn't support --xpath diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t index b177574f756..5b4b6ddb4d3 100644 --- a/tests/bitrot/bug-1294786.t +++ b/tests/bitrot/bug-1294786.t @@ -65,8 +65,8 @@ EXPECT "4" get_quarantine_count "$B1"; TEST $CLI_1 volume stop $V0 TEST $CLI_1 volume start $V0 EXPECT 'Started' volinfo_field_1 $V0 'Status'; -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1 diff --git a/tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t b/tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t index 9e05e8150d6..11ed0d94d79 100644 --- a/tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t +++ b/tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t @@ -46,7 +46,7 @@ kill_glusterd 2 TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start TEST $glusterd_2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0} +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0} EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 @@ -61,7 +61,7 @@ kill_glusterd 2 TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit TEST $glusterd_2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0} +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0} EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t index ab7c1adc1c3..05aef4edccb 100644 --- a/tests/bugs/glusterd/quorum-validation.t +++ b/tests/bugs/glusterd/quorum-validation.t @@ -14,12 +14,15 @@ EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H2:$B2/${V0}1 TEST $CLI_1 volume set $V0 cluster.server-quorum-type server TEST $CLI_1 volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1 #bug-1177132 - sync server quorum options when a node is brought up TEST $CLI_1 volume set all cluster.server-quorum-ratio 52 #Bring down 2nd glusterd TEST kill_glusterd 2 +EXPECT_WITHIN $PROBE_TIMEOUT 0 peer_count #bug-1104642 - sync server quorum options when a node is brought up #set the volume all options from the 1st glusterd @@ -49,8 +52,8 @@ TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0}2 start TEST $CLI_1 volume set $V0 barrier enable TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0}2 stop -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1 ## Stop the volume TEST $CLI_1 volume stop $V0 @@ -75,8 +78,8 @@ TEST $CLI_1 volume profile $V0 start #bug-1352277 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1 TEST $CLI_1 volume set $V0 cluster.server-quorum-type none @@ -85,31 +88,31 @@ TEST killall_gluster #bring back 1st glusterd and check whether the brick process comes back TEST $glusterd_1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 #enabling quorum should bring down the brick TEST $CLI_1 volume set $V0 cluster.server-quorum-type server -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status_1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 TEST $glusterd_2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1 #bug-1367478 - brick processes should not be up when quorum is not met TEST $CLI_1 volume create $V1 $H1:$B1/${V1}1 $H2:$B2/${V1}2 TEST $CLI_1 volume start $V1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V1 $H1 $B1/${V1}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V1 $H2 $B2/${V1}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H1 $B1/${V1}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H2 $B2/${V1}2 # Restart 2nd glusterd TEST kill_glusterd 2 TEST $glusterd_2 # Check if all bricks are up -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V1 $H1 $B1/${V1}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V1 $H2 $B2/${V1}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H1 $B1/${V1}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H2 $B2/${V1}2 cleanup diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t index 5d2d9590a0e..cdb1a3399c9 100644 --- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t @@ -41,8 +41,8 @@ TEST $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} $H1:$B0/${V0} commit force TEST $CLI_1 peer probe $H3; EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B0/${V0} -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B0/${V0} +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B0/${V0} +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B0/${V0} EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2 diff --git a/tests/bugs/glusterd/validating-server-quorum.t b/tests/bugs/glusterd/validating-server-quorum.t index 277bb4af993..ae7d83fd81c 100644 --- a/tests/bugs/glusterd/validating-server-quorum.t +++ b/tests/bugs/glusterd/validating-server-quorum.t @@ -32,9 +32,9 @@ TEST $CLI_1 volume start $V0 #bug-1345727 - bricks should be down when quorum is not met -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H3 $B3/${V0}3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H3 $B3/${V0}3 # Bring down glusterd on 2nd node TEST kill_glusterd 2 @@ -44,7 +44,7 @@ TEST kill_glusterd 3 EXPECT_WITHIN $PROBE_TIMEOUT 0 peer_count # Server quorum is not met. Brick on 1st node must be down -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status_1 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1 # Set quorum ratio 95. means 95 % or more than 95% nodes of total available node # should be available for performing volume operation. @@ -62,26 +62,41 @@ TEST $glusterd_2 EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count # Server quorum is still not met. Bricks should be down on 1st and 2nd nodes -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status_1 $V0 $H1 $B1/${V0}1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status_1 $V0 $H2 $B2/${V0}2 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2 # Bring back 3rd glusterd TEST $glusterd_3 EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count # Server quorum is met now. Bricks should be up on all nodes -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H3 $B3/${V0}3 +# Check from 3rd instance of glusterd so that the 3rd node finishes all its +# handshake and then report back the brick status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H2 $B2/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H3 $B3/${V0}3 + +# Check from 1st instance of glusterd +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H2 $B2/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H3 $B3/${V0}3 + +# TODO : Because commit fe71ee7 introduced a delay of 1 sec to wait for shd connect and +# disconnect events to be serially processed during a restart of shd daemon, +# this introduced a race where while releasing big lock, if any command sneaks +# and acquires the big lock, it might be able to work on a volinfo which is +# stale. We need to find a better way to fix this. + +sleep 3 # quorum is met. replace-brick will execute successfully EXPECT_WITHIN $PEER_SYNC_TIMEOUT 0 attempt_replace_brick 1 $V0 $H2:$B2/${V0}2 $H2:$B2/${V0}2_new TEST $CLI_1 volume reset all TEST $CLI_1 volume set $V0 cluster.server-quorum-type server -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2_new -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H3 $B3/${V0}3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2_new +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H3 $B3/${V0}3 #bug-913555 - volume should become unwritable when quorum does not met @@ -92,15 +107,15 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0; # Kill one pseudo-node, make sure the others survive and volume stays up. TEST kill_node 3; EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers; -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2_new +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2_new EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0; # Kill another pseudo-node, make sure the last one dies and volume goes down. TEST kill_node 2; EXPECT_WITHIN $PROBE_TIMEOUT 0 check_peers #two glusterfsds of the other two glusterds must be dead -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status_1 $V0 $H1 $B1/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_fs $M0; TEST $glusterd_2; diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t index 22f98d2b5a7..f30194b6339 100644 --- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t +++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t @@ -111,6 +111,10 @@ EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count; EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0} EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1} +# It might be possible that the import snap synctask is still updating the data, +# we need to allow a buffer time to be on the safer side +sleep 2 + kill_glusterd 2 activate_snapshots EXPECT 'Started' snapshot_status ${V0}_snap; @@ -125,3 +129,4 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0} EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" mounted_snaps ${V1} cleanup; +# run first! diff --git a/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t index 0624a5db977..53b274e8819 100644 --- a/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t +++ b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t @@ -28,7 +28,7 @@ TEST ! snapshot_exists 1 ${V0}_snap1 TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 start TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 $H1:$L1/B1 commit force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $L1/B1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $L1/B1 TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp TEST snapshot_exists 1 ${V0}_snap1 diff --git a/tests/cluster.rc b/tests/cluster.rc index 1c5247ca43f..99be8e79c21 100644 --- a/tests/cluster.rc +++ b/tests/cluster.rc @@ -184,10 +184,10 @@ function volinfo_field_2() $CLI_2 volume info $vol | grep "^$field: " | sed 's/.*: //'; } -function brick_up_status_1 { - local vol=$1 - local host=$2 - local brick=$3 - $CLI_1 volume status $vol $host:$brick --xml | sed -ne 's/.*\([01]\)<\/status>/\1/p' +function cluster_brick_up_status { + local vol=$2 + local host=$3 + local brick=$4 + eval \$CLI_$1 volume status $vol $host:$brick --xml | sed -ne 's/.*\([01]\)<\/status>/\1/p' } -- cgit