summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2019-05-21 10:58:44 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2019-05-24 15:00:28 +0000
commitf6c0b59725615da10435c40fec0f26dae542de74 (patch)
treeb5ebd98ba71c8da48a09cb26bc987f22d09fee21
parent0b83b8af60916dcdb850f15da7e7b406809dd1d5 (diff)
tests: Fix spurious failures in ta-write-on-bad-brick.t
Problem: afr_child_up_status_meta works only when LOOKUP on $M0 is successful. There are cases where quorum is not met and LOOKUP fails on $M0 which leads to failures similar to: grep: /mnt/glusterfs/0/.meta/graphs/active/patchy-replicate-0/private: Transport endpoint is not connected This was happening once in a while based on attribute-timeout and md-cache not serving the lookup. Fix: Find child-up status based on statedump instead. Also changed mount options to include --entry-timeout=0 and --attribute-timeout=0 updates bz#1193929 Change-Id: Ic0de72c3006d7399a5feb3e4d10d4748949b2ab3 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
-rw-r--r--tests/basic/afr/ta-read.t10
-rw-r--r--tests/basic/afr/ta-shd.t6
-rw-r--r--tests/basic/afr/ta-write-on-bad-brick.t14
-rw-r--r--tests/thin-arbiter.rc22
-rw-r--r--tests/volume.rc1
5 files changed, 36 insertions, 17 deletions
diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t
index d6f9332d757..3cfc16b9b8a 100644
--- a/tests/basic/afr/ta-read.t
+++ b/tests/basic/afr/ta-read.t
@@ -25,7 +25,7 @@ TEST ! ls $B0/ta/FILE
# Kill one brick and write to FILE.
TEST ta_kill_brick brick0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
echo "brick0 down">> $M0/FILE
TEST [ $? -eq 0 ]
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE
@@ -35,7 +35,7 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST ta_start_mount_process $M0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
# Read must be allowed since good brick is up.
TEST cat $M0/FILE
@@ -45,15 +45,15 @@ TEST ta_start_mount_process $M0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
# Toggle good and bad data brick processes.
TEST ta_start_brick_process brick0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
# Read must now fail.
TEST ! cat $M0/FILE
# Bring all data bricks up, and kill TA.
TEST ta_start_brick_process brick1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
TA_PID=$(ta_get_pid_by_brick_name ta)
TEST [ -n $TA_PID ]
TEST ta_kill_brick ta
diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t
index bb2e58b3f77..96ecfc678e0 100644
--- a/tests/basic/afr/ta-shd.t
+++ b/tests/basic/afr/ta-shd.t
@@ -22,7 +22,7 @@ TEST ta_start_shd_process glustershd
TEST touch $M0/a.txt
TEST ta_kill_brick brick0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
echo "Hello" >> $M0/a.txt
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
@@ -33,14 +33,14 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/
#the SHD process.
TEST ta_start_brick_process brick0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
#Kill the previously up brick and try reading from other brick. Since the heal
#has happened file content should be same.
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
#Umount and mount to remove cached data.
TEST umount $M0
TEST ta_start_mount_process $M0
diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t
index 18cb65b3a76..096ca9f47cf 100644
--- a/tests/basic/afr/ta-write-on-bad-brick.t
+++ b/tests/basic/afr/ta-write-on-bad-brick.t
@@ -26,26 +26,26 @@ TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
#Good Data brick is down. TA and bad brick are UP
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
TEST ta_kill_brick brick0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
TEST ta_start_brick_process brick1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
# Good Data brick is UP. Bad and TA are down
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
TEST ta_start_brick_process brick0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
TEST ta_kill_brick ta
TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
# Good and Bad data bricks are UP. TA is down
TEST ta_start_brick_process brick1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
cleanup;
diff --git a/tests/thin-arbiter.rc b/tests/thin-arbiter.rc
index 72202eeb53f..29edeb1222b 100644
--- a/tests/thin-arbiter.rc
+++ b/tests/thin-arbiter.rc
@@ -173,7 +173,7 @@ function ta_start_mount_process()
{
mkdir -p $1
identifier=$(echo $1 | tr / .)
- if glusterfs -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
+ if glusterfs --entry-timeout=0 --attribute-timeout=0 -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
then
cat $B0/$identifier.pid
else
@@ -182,6 +182,13 @@ function ta_start_mount_process()
fi
}
+function ta_get_mount_pid()
+{
+ local mount_path=$1
+ identifier=$(echo $mount_path | tr / .)
+ cat $B0/${identifier}.pid
+}
+
function ta_create_mount_volfile()
{
local b0=$B0/$1
@@ -607,3 +614,16 @@ function ta_start_shd_process()
return 1
fi
}
+
+function ta_mount_child_up_status()
+{
+ local mount_path=$1
+ #brick_id is (brick-num in volume info - 1)
+ local vol=$2
+ local brick_id=$3
+ local pid=$(ta_get_mount_pid $mount_path)
+ local fpath=$(generate_statedump $pid)
+ up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
diff --git a/tests/volume.rc b/tests/volume.rc
index f652a5b0099..af7690ba5ff 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -138,7 +138,6 @@ function wait_statedump_ready {
}
function generate_statedump {
- local fpath=""
pid=$1
#remove old stale statedumps
cleanup_statedump $pid