#!/bin/bash #Test the split-brain resolution CLI commands. . $(dirname $0)/../../include.rc . $(dirname $0)/../../volume.rc cleanup; TEST glusterd TEST pidof glusterd #Create replica 2 volume TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} TEST $CLI volume set $V0 performance.write-behind off TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume start $V0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST touch $M0/file ############ Healing using favorite-child-policy = ctime ################# TEST kill_brick $V0 $H0 $B0/${V0}0 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST kill_brick $V0 $H0 $B0/${V0}1 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 TEST $CLI volume set $V0 cluster.self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 #file fill in split-brain cat $M0/file > /dev/null EXPECT "1" echo $? # Umount to prevent further FOPS on the file, then find the brick with latest ctime. EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ctime1=`stat -c "%.Z" $B0/${V0}0/file` ctime2=`stat -c "%.Z" $B0/${V0}1/file` if (( $(echo "$ctime1 > $ctime2" | bc -l) )); then LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) else LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) fi TEST $CLI volume set $V0 cluster.favorite-child-policy ctime TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) TEST [ "$LATEST_CTIME_MD5" == "$B0_MD5" ] TEST [ "$LATEST_CTIME_MD5" == "$B1_MD5" ] TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 cat $M0/file > /dev/null EXPECT "0" echo $? ############ Healing using favorite-child-policy = mtime ################# TEST $CLI volume set $V0 cluster.favorite-child-policy none TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST kill_brick $V0 $H0 $B0/${V0}1 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 TEST kill_brick $V0 $H0 $B0/${V0}0 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST $CLI volume set $V0 cluster.self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 #file still in split-brain cat $M0/file > /dev/null EXPECT "1" echo $? #We know that the second brick has latest mtime. LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) TEST $CLI volume set $V0 cluster.favorite-child-policy mtime TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 cat $M0/file > /dev/null EXPECT "0" echo $? HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] ############ Healing using favorite-child-policy = size ################# TEST $CLI volume set $V0 cluster.favorite-child-policy none TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST kill_brick $V0 $H0 $B0/${V0}1 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 TEST kill_brick $V0 $H0 $B0/${V0}0 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST $CLI volume set $V0 cluster.self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 #file fill in split-brain cat $M0/file > /dev/null EXPECT "1" echo $? #We know that the second brick has the bigger size file. BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) TEST $CLI volume set $V0 cluster.favorite-child-policy size TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 cat $M0/file > /dev/null EXPECT "0" echo $? HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] ############ Healing using favorite-child-policy = majority on replica-3 ################# #Convert volume to replica-3 TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 TEST $CLI volume set $V0 cluster.quorum-type none TEST $CLI volume set $V0 cluster.favorite-child-policy none TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST kill_brick $V0 $H0 $B0/${V0}0 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST kill_brick $V0 $H0 $B0/${V0}1 TEST kill_brick $V0 $H0 $B0/${V0}2 TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 TEST $CLI volume set $V0 cluster.self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 TEST $CLI volume heal $V0 #file fill in split-brain cat $M0/file > /dev/null EXPECT "1" echo $? #We know that the second and third bricks agree with each other. Pick any one of them. MAJORITY_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) TEST $CLI volume set $V0 cluster.favorite-child-policy majority TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 cat $M0/file > /dev/null EXPECT "0" echo $? HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] TEST force_umount $M0 cleanup