From f6c0b59725615da10435c40fec0f26dae542de74 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 21 May 2019 10:58:44 +0530 Subject: tests: Fix spurious failures in ta-write-on-bad-brick.t Problem: afr_child_up_status_meta works only when LOOKUP on $M0 is successful. There are cases where quorum is not met and LOOKUP fails on $M0 which leads to failures similar to: grep: /mnt/glusterfs/0/.meta/graphs/active/patchy-replicate-0/private: Transport endpoint is not connected This was happening once in a while based on attribute-timeout and md-cache not serving the lookup. Fix: Find child-up status based on statedump instead. Also changed mount options to include --entry-timeout=0 and --attribute-timeout=0 updates bz#1193929 Change-Id: Ic0de72c3006d7399a5feb3e4d10d4748949b2ab3 Signed-off-by: Pranith Kumar K --- tests/basic/afr/ta-read.t | 10 +++++----- tests/basic/afr/ta-shd.t | 6 +++--- tests/basic/afr/ta-write-on-bad-brick.t | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) (limited to 'tests/basic') diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t index d6f9332d757..3cfc16b9b8a 100644 --- a/tests/basic/afr/ta-read.t +++ b/tests/basic/afr/ta-read.t @@ -25,7 +25,7 @@ TEST ! ls $B0/ta/FILE # Kill one brick and write to FILE. TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 echo "brick0 down">> $M0/FILE TEST [ $? -eq 0 ] EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE @@ -35,7 +35,7 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST ta_start_mount_process $M0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 # Read must be allowed since good brick is up. TEST cat $M0/FILE @@ -45,15 +45,15 @@ TEST ta_start_mount_process $M0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 # Toggle good and bad data brick processes. TEST ta_start_brick_process brick0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 # Read must now fail. TEST ! cat $M0/FILE # Bring all data bricks up, and kill TA. TEST ta_start_brick_process brick1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 TA_PID=$(ta_get_pid_by_brick_name ta) TEST [ -n $TA_PID ] TEST ta_kill_brick ta diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t index bb2e58b3f77..96ecfc678e0 100644 --- a/tests/basic/afr/ta-shd.t +++ b/tests/basic/afr/ta-shd.t @@ -22,7 +22,7 @@ TEST ta_start_shd_process glustershd TEST touch $M0/a.txt TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 echo "Hello" >> $M0/a.txt EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 @@ -33,14 +33,14 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/ #the SHD process. TEST ta_start_brick_process brick0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 #Kill the previously up brick and try reading from other brick. Since the heal #has happened file content should be same. TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 #Umount and mount to remove cached data. TEST umount $M0 TEST ta_start_mount_process $M0 diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t index 18cb65b3a76..096ca9f47cf 100644 --- a/tests/basic/afr/ta-write-on-bad-brick.t +++ b/tests/basic/afr/ta-write-on-bad-brick.t @@ -26,26 +26,26 @@ TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 #Good Data brick is down. TA and bad brick are UP TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 TEST ta_start_brick_process brick1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5 # Good Data brick is UP. Bad and TA are down TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 TEST ta_start_brick_process brick0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 TEST ta_kill_brick ta TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5 # Good and Bad data bricks are UP. TA is down TEST ta_start_brick_process brick1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 cleanup; -- cgit