From 8beaf169e39b262416e2274a028292379d39b310 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 9 Jan 2015 14:43:22 +0000 Subject: cluster/afr: split-brain resolution CLI Extend the AFR heal command to include automated split-brain resolution. This patch [3/3] is the final patch for afr automated split-brain resolution implementation. "gluster volume heal [full | statistics [heal-count [replica ]] |info [healed | heal-failed | split-brain]| split-brain {bigger-file |source-brick []}]" The new additions being: 1.gluster volume heal split-brain bigger-file Locates the replica containing the FILE, selects bigger-file as source and completes heal. 2.gluster volume heal split-brain source-brick Selects present in as source and completes heal. 3.gluster volume heal split-brain Selects all split-brained files in as source and completes heal. Note: can be either the full file name as seen from the root of the volume (or) the gfid-string representation of the file, which sometimes gets displayed in the heal info command's output. Entry/gfid split-brain resolution is not supported. Example can be found in the test case. Change-Id: I4649733922d406f14f28ee9033a5cb627b9538b3 BUG: 1136769 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/9377 Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri Tested-by: Gluster Build System --- tests/basic/afr/split-brain-healing.t | 183 ++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 tests/basic/afr/split-brain-healing.t (limited to 'tests/basic') diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t new file mode 100644 index 00000000000..1dc317df8dd --- /dev/null +++ b/tests/basic/afr/split-brain-healing.t @@ -0,0 +1,183 @@ +#!/bin/bash + +#Test the split-brain resolution CLI commands. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +function get_replicate_subvol_number { + local filename=$1 + #get_backend_paths + if [ -f $B0/${V0}1/$filename ] + then + echo 0 + elif [ -f $B0/${V0}3/$filename ] + then echo 1 + else + echo -1 + fi +} + +cleanup; + +AREQUAL_PATH=$(dirname $0)/../../utils +CFLAGS="" +test "`uname -s`" != "Linux" && { + CFLAGS="$CFLAGS -I$(dirname $0)/../../../contrib/argp-standalone "; + CFLAGS="$CFLAGS -L$(dirname $0)/../../../contrib/argp-standalone -largp "; + CFLAGS="$CFLAGS -lintl"; +} +build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +cd $M0 +for i in {1..10} +do + echo "Initial content">>file$i +done + +replica_0_files_list=(`ls $B0/${V0}1`) +replica_1_files_list=(`ls $B0/${V0}3`) + +############ Create data split-brain in the files. ########################### +TEST kill_brick $V0 $H0 $B0/${V0}1 +for file in ${!replica_0_files_list[*]} +do + echo "B1 is down">>${replica_0_files_list[$file]} +done +TEST kill_brick $V0 $H0 $B0/${V0}3 +for file in ${!replica_1_files_list[*]} +do + echo "B3 is down">>${replica_1_files_list[$file]} +done + +SMALLER_FILE_SIZE=$(stat -c %s file1) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +TEST kill_brick $V0 $H0 $B0/${V0}2 +for file in ${!replica_0_files_list[*]} +do + echo "B2 is down">>${replica_0_files_list[$file]} + echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]} +done +TEST kill_brick $V0 $H0 $B0/${V0}4 +for file in ${!replica_1_files_list[*]} +do + echo "B4 is down">>${replica_1_files_list[$file]} + echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]} +done + +BIGGER_FILE_SIZE=$(stat -c %s file1) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 + + +############### Acessing the files should now give EIO. ############################### +TEST ! cat file1 +TEST ! cat file2 +TEST ! cat file3 +TEST ! cat file4 +TEST ! cat file5 +TEST ! cat file6 +TEST ! cat file7 +TEST ! cat file8 +TEST ! cat file9 +TEST ! cat file10 +################### +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 + +################ Heal file1 using the bigger-file option ############## +$CLI volume heal $V0 split-brain bigger-file /file1 +EXPECT "0" echo $? +EXPECT $BIGGER_FILE_SIZE stat -c %s file1 + +################ Heal file2 using the bigger-file option and its gfid ############## +subvolume=$(get_replicate_subvol_number file2) +if [ $subvolume == 0 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2) +elif [ $subvolume == 1 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2) +fi +GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" +$CLI volume heal $V0 split-brain bigger-file $GFIDSTR +EXPECT "0" echo $? + +################ Heal file3 using the source-brick option ############## +################ Use the brick having smaller file size as source ####### +subvolume=$(get_replicate_subvol_number file3) +if [ $subvolume == 0 ] +then + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3 +elif [ $subvolume == 1] +then + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 +fi +EXPECT "0" echo $? +EXPECT $SMALLER_FILE_SIZE stat -c %s file3 + +################ Heal file4 using the source-brick option and it's gfid ############## +################ Use the brick having smaller file size as source ####### +subvolume=$(get_replicate_subvol_number file4) +if [ $subvolume == 0 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4) + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR +elif [ $subvolume == 1] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4) + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR +fi +EXPECT "0" echo $? +EXPECT $SMALLER_FILE_SIZE stat -c %s file4 + +################ Heal remaining SB'ed files of replica_0 using B1 as source ############## +$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 +EXPECT "0" echo $? + +################ Heal remaining SB'ed files of replica_1 using B3 as source ############## +$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 +EXPECT "0" echo $? + +############### Reading the files should now suceed. ############################### +TEST cat file1 +TEST cat file2 +TEST cat file3 +TEST cat file4 +TEST cat file5 +TEST cat file6 +TEST cat file7 +TEST cat file8 +TEST cat file9 +TEST cat file10 + +################ File contents on the bricks must be same. ################################ +TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs) +TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs) + +############### Trying to heal files not in SB should fail. ############################### +$CLI volume heal $V0 split-brain bigger-file /file1 +EXPECT "1" echo $? +$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 +EXPECT "1" echo $? + +cd - +TEST rm $AREQUAL_PATH/arequal-checksum +cleanup -- cgit