From fecebde3fbae17ace970a4d9c440f6455161dc62 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Sat, 30 May 2015 10:23:33 +0530 Subject: afr: honour selfheal enable/disable volume set options Backport of http://review.gluster.org/11012 Note: http://review.gluster.org/9459 is not backported to 3.6 but the change it makes to afr_get_heal_info() (i.e. handling ret values) is needed for heal info to work correctly and tests/basic/afr/client-side-heal.t to pass. -------------------------- afr-v1 had the following volume set options that are used to enable/ disable self-heals from happening in AFR xlator when loaded in the client graph: cluster.metadata-self-heal cluster.data-self-heal cluster.entry-self-heal In afr-v2, these 3 heals can happen from the client if there is an inode refresh. This patch allows such heals to proceed only if the corresponding volume set options are set to true. -------------------------- Change-Id: Iebf863758d902fd2f95be320c6791d4e15f634e7 BUG: 1230259 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/11170 Tested-by: Gluster Build System Reviewed-by: Anuradha Talur Reviewed-by: Raghavendra Bhat --- tests/basic/afr/client-side-heal.t | 86 ++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 15 ++++- xlators/cluster/afr/src/afr-self-heal-common.c | 11 +++- 3 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 tests/basic/afr/client-side-heal.t diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t new file mode 100644 index 00000000000..c9b3e355802 --- /dev/null +++ b/tests/basic/afr/client-side-heal.t @@ -0,0 +1,86 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off + +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +echo "some data" > $M0/datafile +EXPECT 0 echo $? +TEST touch $M0/mdatafile +TEST mkdir $M0/dir + +#Kill a brick and perform I/O to have pending heals. +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" afr_child_up_status $V0 0 + +#pending data heal +echo "some more data" >> $M0/datafile +EXPECT 0 echo $? + +#pending metadata heal +TEST chmod +x $M0/mdatafile + +#pending entry heal. Also causes pending metadata/data heals on file{1..5} +TEST touch $M0/dir/file{1..5} + +EXPECT 8 afr_get_pending_heal_count $V0 + +#After brick comes back up, access from client should not trigger heals +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Medatada heal via explicit lookup must not happen +TEST ls $M0/mdatafile + +#Inode refresh must not trigger data and entry heals. +#To trigger inode refresh for sure, the volume is unmounted and mounted each time. +#Check that data heal does not happen. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST cat $M0/datafile +#Check that entry heal does not happen. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST ls $M0/dir + +#No heal must have happened +EXPECT 8 afr_get_pending_heal_count $V0 + +#Enable heal client side heal options and trigger heals +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on + +#Metadata heal is triggered by lookup without need for inode refresh. +TEST ls $M0/mdatafile +EXPECT 7 afr_get_pending_heal_count $V0 + +#Inode refresh must trigger data and entry heals. +#To trigger inode refresh for sure, the volume is unmounted and mounted each time. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST cat $M0/datafile + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST ls $M0/dir +EXPECT 5 afr_get_pending_heal_count $V0 + +TEST cat $M0/dir/file1 +TEST cat $M0/dir/file2 +TEST cat $M0/dir/file3 +TEST cat $M0/dir/file4 +TEST cat $M0/dir/file5 + +EXPECT 0 afr_get_pending_heal_count $V0 +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 39687fae8f3..20cff5c5206 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1561,6 +1561,9 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this) replies = local->replies; priv = this->private; + if (!priv->metadata_self_heal) + return _gf_false; + for (i = 0; i < priv->child_count; i++) { if(!replies[i].valid || replies[i].op_ret == -1) continue; @@ -4439,7 +4442,12 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc, dict = afr_set_heal_info ("split-brain"); } else if (ret == -EAGAIN) { dict = afr_set_heal_info ("possibly-healing"); - } else if (ret == 0) { + } else if (ret >= 0) { + /* value of ret = source index + * so ret >= 0 and at least one of the 3 booleans set to + * true means a source is identified; heal is required. + */ + if (!data_selfheal && !entry_selfheal && !metadata_selfheal) { dict = afr_set_heal_info ("no-heal"); @@ -4447,6 +4455,11 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc, dict = afr_set_heal_info ("heal"); } } else if (ret < 0) { + /* Apart from above checked -ve ret values, there are other + * possible ret values like ENOTCONN (returned when number of + * valid replies received are less than 2) in which case heal is + * required when one of the selfheal booleans is set. + */ if (data_selfheal || entry_selfheal || metadata_selfheal) { dict = afr_set_heal_info ("heal"); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 7fb6e2b9dc4..b3194724e51 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1133,6 +1133,11 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid) gf_boolean_t data_selfheal = _gf_false; gf_boolean_t metadata_selfheal = _gf_false; gf_boolean_t entry_selfheal = _gf_false; + afr_private_t *priv = NULL; + gf_boolean_t dataheal_enabled = _gf_false; + + priv = this->private; + gf_string2boolean (priv->data_self_heal, &dataheal_enabled); ret = afr_selfheal_unlocked_inspect (frame, this, gfid, &inode, &data_selfheal, @@ -1146,13 +1151,13 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid) goto out; } - if (data_selfheal) + if (data_selfheal && dataheal_enabled) data_ret = afr_selfheal_data (frame, this, inode); - if (metadata_selfheal) + if (metadata_selfheal && priv->metadata_self_heal) metadata_ret = afr_selfheal_metadata (frame, this, inode); - if (entry_selfheal) + if (entry_selfheal && priv->entry_self_heal) entry_ret = afr_selfheal_entry (frame, this, inode); or_ret = (data_ret | metadata_ret | entry_ret); -- cgit