#!/bin/bash . $(dirname $0)/../../include.rc . $(dirname $0)/../../volume.rc cleanup; START_TIMESTAMP=`date +%s` function kill_multiple_bricks { local vol=$1 local host=$2 local brickpath=$3 if [ $decide_kill == 0 ] then for ((i=0; i<=4; i=i+2)) do TEST kill_brick $vol $host $brickpath/${vol}$i done else for ((i=1; i<=5; i=i+2)) do TEST kill_brick $vol $host $brickpath/${vol}$i done fi } function check_bricks_up { local vol=$1 if [ $decide_kill == 0 ] then for ((i=0; i<=4; i=i+2)) do EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i done else for ((i=1; i<=5; i=i+2)) do EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i done fi } function disconnected_brick_count { local vol=$1 $CLI volume heal $vol info | \ egrep -i '(transport|Socket is not connected)' | wc -l } TESTS_EXPECTED_IN_LOOP=20 TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5} TEST $CLI volume set $V0 cluster.background-self-heal-count 0 TEST $CLI volume set $V0 cluster.eager-lock off TEST $CLI volume set $V0 performance.flush-behind off TEST $CLI volume start $V0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 decide_kill=$((`date +"%j"|sed 's/^0*//'` % 2 )) kill_multiple_bricks $V0 $H0 $B0 cd $M0 HEAL_FILES=0 for i in {1..10} do dd if=/dev/urandom of=f bs=1024k count=10 HEAL_FILES=$(($HEAL_FILES+1)) #+1 for data/metadata self-heal of 'f' mkdir a; cd a; #+3 for metadata self-heal of 'a' one per subvolume of DHT HEAL_FILES=$(($HEAL_FILES+3)) done #+3 represents entry sh on "/", one per subvolume of DHT? HEAL_FILES=$(($HEAL_FILES + 3)) cd ~ EXPECT "$HEAL_FILES" get_pending_heal_count $V0 #When bricks are down, it says Transport End point Not connected for them EXPECT "3" disconnected_brick_count $V0 #Create some stale indices and verify that they are not counted in heal info #TO create stale index create and delete files when one brick is down in #replica pair. for i in {11..20}; do echo abc > $M0/$i; done HEAL_FILES=$(($HEAL_FILES + 10)) #count extra 10 files EXPECT "$HEAL_FILES" get_pending_heal_count $V0 #delete the files now, so that stale indices will remain. for i in {11..20}; do rm -f $M0/$i; done #After deleting files they should not appear in heal info HEAL_FILES=$(($HEAL_FILES - 10)) EXPECT "$HEAL_FILES" get_pending_heal_count $V0 TEST ! $CLI volume heal $V0 TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST ! $CLI volume heal $V0 TEST ! $CLI volume heal $V0 full TEST $CLI volume start $V0 force TEST $CLI volume set $V0 cluster.self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status check_bricks_up $V0 TEST $CLI volume heal $V0 sleep 5 #Until the heal-statistics command implementation #check that this heals the contents partially TEST [ $HEAL_FILES -gt $(get_pending_heal_count $V0) ] TEST $CLI volume heal $V0 full EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 #Test that ongoing IO is not considered as Pending heal (dd if=/dev/zero of=$M0/file1 bs=1k 2>/dev/null 1>/dev/null)& back_pid1=$!; (dd if=/dev/zero of=$M0/file2 bs=1k 2>/dev/null 1>/dev/null)& back_pid2=$!; (dd if=/dev/zero of=$M0/file3 bs=1k 2>/dev/null 1>/dev/null)& back_pid3=$!; (dd if=/dev/zero of=$M0/file4 bs=1k 2>/dev/null 1>/dev/null)& back_pid4=$!; (dd if=/dev/zero of=$M0/file5 bs=1k 2>/dev/null 1>/dev/null)& back_pid5=$!; EXPECT 0 get_pending_heal_count $V0 kill -SIGTERM $back_pid1; kill -SIGTERM $back_pid2; kill -SIGTERM $back_pid3; kill -SIGTERM $back_pid4; kill -SIGTERM $back_pid5; wait >/dev/null 2>&1; #Test that volume heal info reports files even when self-heal #options are disabled TEST touch $M0/f TEST mkdir $M0/d #DATA TEST $CLI volume set $V0 cluster.data-self-heal off EXPECT "off" volume_option $V0 cluster.data-self-heal kill_multiple_bricks $V0 $H0 $B0 echo abc > $M0/f EXPECT 1 get_pending_heal_count $V0 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status check_bricks_up $V0 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 TEST $CLI volume set $V0 cluster.data-self-heal on #METADATA TEST $CLI volume set $V0 cluster.metadata-self-heal off EXPECT "off" volume_option $V0 cluster.metadata-self-heal kill_multiple_bricks $V0 $H0 $B0 TEST chmod 777 $M0/f EXPECT 1 get_pending_heal_count $V0 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status check_bricks_up $V0 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 TEST $CLI volume set $V0 cluster.metadata-self-heal on #ENTRY TEST $CLI volume set $V0 cluster.entry-self-heal off EXPECT "off" volume_option $V0 cluster.entry-self-heal kill_multiple_bricks $V0 $H0 $B0 TEST touch $M0/d/a # 4 if mtime/ctime is modified for d in bricks without a # 2 otherwise PENDING=$( get_pending_heal_count $V0 ) TEST test $PENDING -eq 2 -o $PENDING -eq 4 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status check_bricks_up $V0 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 TEST $CLI volume set $V0 cluster.entry-self-heal on #Negative test cases #Fail volume does not exist case TEST ! $CLI volume heal fail info #Fail volume stopped case TEST $CLI volume stop $V0 TEST ! $CLI volume heal $V0 info #Fail non-replicate volume info TEST $CLI volume delete $V0 TEST $CLI volume create $V0 $H0:$B0/${V0}{6} TEST $CLI volume start $V0 TEST ! $CLI volume heal $V0 info # Check for non Linux systems that we did not mess with directory offsets TEST ! log_newer $START_TIMESTAMP "offset reused from another DIR" cleanup