From 59ba78ae1461651e290ce72013786d828545d4c1 Mon Sep 17 00:00:00 2001 From: Anuradha Date: Mon, 5 Jan 2015 16:37:07 +0530 Subject: afr : glfs-heal implementation Backport of http://review.gluster.org/6529 and http://review.gluster.org/9119 Change-Id: Ie420efcb399b5119c61f448b421979c228b27b15 BUG: 1173528 Signed-off-by: Anuradha Reviewed-on: http://review.gluster.org/9335 Reviewed-by: Ravishankar N Tested-by: Gluster Build System Reviewed-by: Raghavendra Bhat --- tests/basic/afr/self-heald.t | 184 +++++++++++++++++++++++++++++++++++++++++++ tests/basic/self-heald.t | 44 ----------- tests/bugs/bug-880898.t | 4 +- 3 files changed, 186 insertions(+), 46 deletions(-) create mode 100644 tests/basic/afr/self-heald.t delete mode 100644 tests/basic/self-heald.t (limited to 'tests') diff --git a/tests/basic/afr/self-heald.t b/tests/basic/afr/self-heald.t new file mode 100644 index 00000000000..1c8bd0ff52e --- /dev/null +++ b/tests/basic/afr/self-heald.t @@ -0,0 +1,184 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +function kill_multiple_bricks { + local vol=$1 + local host=$2 + local brickpath=$3 + + if [ $decide_kill == 0 ] + then + for ((i=0; i<=4; i=i+2)) do + TEST kill_brick $vol $host $brickpath/${vol}$i + done + else + for ((i=1; i<=5; i=i+2)) do + TEST kill_brick $vol $host $brickpath/${vol}$i + done + fi +} +function check_bricks_up { + local vol=$1 + if [ $decide_kill == 0 ] + then + for ((i=0; i<=4; i=i+2)) do + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i + done + else + for ((i=1; i<=5; i=i+2)) do + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i + done + fi +} + +function disconnected_brick_count { + local vol=$1 + $CLI volume heal $vol info | grep -i transport | wc -l +} + +TESTS_EXPECTED_IN_LOOP=20 +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +decide_kill=$((`date +"%j"`))%2 + +kill_multiple_bricks $V0 $H0 $B0 +cd $M0 +HEAL_FILES=0 +for i in {1..10} +do + dd if=/dev/urandom of=f bs=1M count=10 2>/dev/null + HEAL_FILES=$(($HEAL_FILES+1)) #+1 for data/metadata self-heal of 'f' + mkdir a; cd a; + #+3 for metadata self-heal of 'a' one per subvolume of DHT + HEAL_FILES=$(($HEAL_FILES+3)) +done +#+3 represents entry sh on "/", one per subvolume of DHT? +HEAL_FILES=$(($HEAL_FILES + 3)) + +cd ~ +EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 + +#When bricks are down, it says Transport End point Not connected for them +EXPECT "3" disconnected_brick_count $V0 + +#Create some stale indices and verify that they are not counted in heal info +#TO create stale index create and delete files when one brick is down in +#replica pair. +for i in {11..20}; do echo abc > $M0/$i; done +HEAL_FILES=$(($HEAL_FILES + 10)) #count extra 10 files +EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 +#delete the files now, so that stale indices will remain. +for i in {11..20}; do rm -f $M0/$i; done +#After deleting files they should not appear in heal info +HEAL_FILES=$(($HEAL_FILES - 10)) +EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 + + +TEST ! $CLI volume heal $V0 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST ! $CLI volume heal $V0 +TEST ! $CLI volume heal $V0 full +TEST $CLI volume start $V0 force +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN 20 "Y" glustershd_up_status + +check_bricks_up $V0 + +TEST $CLI volume heal $V0 +sleep 5 #Until the heal-statistics command implementation +#check that this heals the contents partially +TEST [ $HEAL_FILES -gt $(afr_get_pending_heal_count $V0) ] + +TEST $CLI volume heal $V0 full +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 + +#Test that ongoing IO is not considered as Pending heal +(dd if=/dev/zero of=$M0/file1 bs=1K 2>/dev/null 1>/dev/null)& +back_pid1=$!; +(dd if=/dev/zero of=$M0/file2 bs=1K 2>/dev/null 1>/dev/null)& +back_pid2=$!; +(dd if=/dev/zero of=$M0/file3 bs=1K 2>/dev/null 1>/dev/null)& +back_pid3=$!; +(dd if=/dev/zero of=$M0/file4 bs=1K 2>/dev/null 1>/dev/null)& +back_pid4=$!; +(dd if=/dev/zero of=$M0/file5 bs=1K 2>/dev/null 1>/dev/null)& +back_pid5=$!; +EXPECT 0 afr_get_pending_heal_count $V0 +kill -SIGTERM $back_pid1; +kill -SIGTERM $back_pid2; +kill -SIGTERM $back_pid3; +kill -SIGTERM $back_pid4; +kill -SIGTERM $back_pid5; +wait >/dev/null 2>&1; + +#Test that volume heal info reports files even when self-heal +#options are disabled +TEST touch $M0/f +TEST mkdir $M0/d +#DATA +TEST $CLI volume set $V0 cluster.data-self-heal off +EXPECT "off" volume_option $V0 cluster.data-self-heal +kill_multiple_bricks $V0 $H0 $B0 +echo abc > $M0/f +EXPECT 1 afr_get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "Y" glustershd_up_status +check_bricks_up $V0 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 +TEST $CLI volume set $V0 cluster.data-self-heal on + +#METADATA +TEST $CLI volume set $V0 cluster.metadata-self-heal off +EXPECT "off" volume_option $V0 cluster.metadata-self-heal +kill_multiple_bricks $V0 $H0 $B0 + +TEST chmod 777 $M0/f +EXPECT 1 afr_get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "Y" glustershd_up_status +check_bricks_up $V0 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 +TEST $CLI volume set $V0 cluster.metadata-self-heal on + +#ENTRY +TEST $CLI volume set $V0 cluster.entry-self-heal off +EXPECT "off" volume_option $V0 cluster.entry-self-heal +kill_multiple_bricks $V0 $H0 $B0 +TEST touch $M0/d/a +EXPECT 2 afr_get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "Y" glustershd_up_status +check_bricks_up $V0 +TEST $CLI volume heal $V0 +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 +TEST $CLI volume set $V0 cluster.entry-self-heal on + +#Negative test cases +#Fail volume does not exist case +TEST ! $CLI volume heal fail info + +#Fail volume stopped case +TEST $CLI volume stop $V0 +TEST ! $CLI volume heal $V0 info + +#Fail non-replicate volume info +TEST $CLI volume delete $V0 +TEST $CLI volume create $V0 $H0:$B0/${V0}{6} +TEST $CLI volume start $V0 +TEST ! $CLI volume heal $V0 info + +cleanup diff --git a/tests/basic/self-heald.t b/tests/basic/self-heald.t deleted file mode 100644 index b5815a6773f..00000000000 --- a/tests/basic/self-heald.t +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc - -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5} -TEST $CLI volume set $V0 cluster.background-self-heal-count 0 -TEST $CLI volume set $V0 cluster.eager-lock off -TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST kill_brick $V0 $H0 $B0/${V0}2 -TEST kill_brick $V0 $H0 $B0/${V0}4 -cd $M0 -HEAL_FILES=0 -for i in {1..10} -do - dd if=/dev/urandom of=f bs=1024k count=10 2>/dev/null - HEAL_FILES=$(($HEAL_FILES+1)) - mkdir a; cd a; - HEAL_FILES=$(($HEAL_FILES+3)) #As many times as distribute subvols -done -HEAL_FILES=$(($HEAL_FILES + 3)) #Count the brick root dir - -cd ~ -EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 -TEST ! $CLI volume heal $V0 -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST ! $CLI volume heal $V0 info -TEST ! $CLI volume heal $V0 -TEST $CLI volume start $V0 force -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4 - -TEST $CLI volume heal $V0 full -EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 -cleanup diff --git a/tests/bugs/bug-880898.t b/tests/bugs/bug-880898.t index 8ba35ec1a59..4b9fb50a522 100644 --- a/tests/bugs/bug-880898.t +++ b/tests/bugs/bug-880898.t @@ -17,7 +17,7 @@ do fi done -gluster volume heal $V0 info | grep "Status: self-heal-daemon is not running on $uuid"; -EXPECT "0" echo $? +#Command execution should fail reporting that the bricks are not running. +TEST ! $CLI volume heal $V0 info cleanup; -- cgit