summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-04-15 12:40:57 +0530
committerVijay Bellur <vbellur@redhat.com>2014-04-28 09:43:24 -0700
commit07ed48398e41df1b65202f3d2e0be6f2c3ca8ceb (patch)
tree61223ce6be2394606058b8403e0a70e40a6df9f0
parent115b4093a44f6e23c28e5a382f82e72ddf73d97c (diff)
cluster/afr: trigger self-heals even when they are set to off.
When attempt-self-heal is set to true, trigger data/metadata/entry self-heals even when they are disabled. This is useful for gluster volume heal info to report them even when metadata-self-heal entry-self-heal, data-self-heal are set to off. Change-Id: Idc3f0d5d049c875b4f975248fef56ea2238da47c BUG: 1039544 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/7480 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--tests/basic/self-heald.t111
-rw-r--r--xlators/cluster/afr/src/afr-common.c93
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c13
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c15
-rw-r--r--xlators/cluster/afr/src/afr.h12
7 files changed, 213 insertions, 36 deletions
diff --git a/tests/basic/self-heald.t b/tests/basic/self-heald.t
index bed008704c2..01ce5b5c8b9 100644
--- a/tests/basic/self-heald.t
+++ b/tests/basic/self-heald.t
@@ -5,6 +5,11 @@
cleanup;
+function disconnected_brick_count {
+ local vol=$1
+ $CLI volume heal $vol info | grep -i transport | wc -l
+}
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
@@ -28,6 +33,23 @@ HEAL_FILES=$(($HEAL_FILES + 3)) #Count the brick root dir
cd ~
EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0
+
+#When bricks are down, it says Transport End point Not connected for them
+EXPECT "3" disconnected_brick_count $V0
+
+#Create some stale indices and verify that they are not counted in heal info
+#TO create stale index create and delete files when one brick is down in
+#replica pair.
+for i in {11..20}; do echo abc > $M0/$i; done
+HEAL_FILES=$(($HEAL_FILES + 10)) #count extra 10 files
+EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0
+#delete the files now, so that stale indices will remain.
+for i in {11..20}; do rm -f $M0/$i; done
+#After deleting files they should not appear in heal info
+HEAL_FILES=$(($HEAL_FILES - 10))
+EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0
+
+
TEST ! $CLI volume heal $V0
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST ! $CLI volume heal $V0
@@ -45,4 +67,93 @@ TEST [ $HEAL_FILES -gt $(afr_get_pending_heal_count $V0) ]
TEST $CLI volume heal $V0 full
EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
+
+#Test that ongoing IO is not considered as Pending heal
+(dd if=/dev/zero of=$M0/file1 bs=1K 2>/dev/null 1>/dev/null)&
+back_pid1=$!;
+(dd if=/dev/zero of=$M0/file2 bs=1K 2>/dev/null 1>/dev/null)&
+back_pid2=$!;
+(dd if=/dev/zero of=$M0/file3 bs=1K 2>/dev/null 1>/dev/null)&
+back_pid3=$!;
+(dd if=/dev/zero of=$M0/file4 bs=1K 2>/dev/null 1>/dev/null)&
+back_pid4=$!;
+(dd if=/dev/zero of=$M0/file5 bs=1K 2>/dev/null 1>/dev/null)&
+back_pid5=$!;
+EXPECT 0 afr_get_pending_heal_count $V0
+kill -SIGTERM $back_pid1;
+kill -SIGTERM $back_pid2;
+kill -SIGTERM $back_pid3;
+kill -SIGTERM $back_pid4;
+kill -SIGTERM $back_pid5;
+wait >/dev/null 2>&1;
+
+#Test that volume heal info reports files even when self-heal
+#options are disabled
+TEST touch $M0/f
+TEST mkdir $M0/d
+#DATA
+TEST $CLI volume set $V0 cluster.data-self-heal off
+EXPECT "off" volume_option $V0 cluster.data-self-heal
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+echo abc > $M0/f
+EXPECT 1 afr_get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "Y" glustershd_up_status
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+TEST $CLI volume heal $V0
+EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.data-self-heal on
+
+#METADATA
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+EXPECT "off" volume_option $V0 cluster.metadata-self-heal
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST chmod 777 $M0/f
+EXPECT 1 afr_get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "Y" glustershd_up_status
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+TEST $CLI volume heal $V0
+EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+
+#ENTRY
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+EXPECT "off" volume_option $V0 cluster.entry-self-heal
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST touch $M0/d/a
+EXPECT 2 afr_get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "Y" glustershd_up_status
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+TEST $CLI volume heal $V0
+EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Negative test cases
+#Fail volume does not exist case
+TEST ! $CLI volume heal fail info
+
+#Fail volume stopped case
+TEST $CLI volume stop $V0
+TEST ! $CLI volume heal $V0 info
+
+#Fail non-replicate volume info
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 $H0:$B0/${V0}{6}
+TEST $CLI volume start $V0
+TEST ! $CLI volume heal $V0 info
+
cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 4b39ff6039d..7dafa0529fa 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1411,19 +1411,90 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
}
gf_boolean_t
-afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+afr_can_start_missing_entry_gfid_self_heal (afr_local_t *local,
+ afr_private_t *priv)
{
- GF_ASSERT (sh);
- GF_ASSERT (priv);
+ if (!local)
+ goto out;
+ //attempt self heal is only for data/metadata/entry
+ if (local->self_heal.do_gfid_self_heal ||
+ local->self_heal.do_missing_entry_self_heal)
+ return _gf_true;
+out:
+ return _gf_false;
+}
+
+gf_boolean_t
+afr_can_start_entry_self_heal (afr_local_t *local, afr_private_t *priv)
+{
+ if (!local)
+ goto out;
+ //force_confirm_spb is not checked here because directory split-brains
+ //are not reported at the moment.
+ if (local->self_heal.do_entry_self_heal) {
+ if (local->attempt_self_heal || priv->entry_self_heal)
+ return _gf_true;
+ }
+out:
+ return _gf_false;
+}
+
+gf_boolean_t
+afr_can_start_data_self_heal (afr_local_t *local, afr_private_t *priv)
+{
+ if (!local)
+ goto out;
+
+ if (local->self_heal.force_confirm_spb)
+ return _gf_true;
+
+ if (local->self_heal.do_data_self_heal) {
+ if (local->attempt_self_heal ||
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ }
+out:
+ return _gf_false;
+}
- if (sh->force_confirm_spb)
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_local_t *local, afr_private_t *priv)
+{
+ if (!local)
+ goto out;
+ if (local->self_heal.force_confirm_spb)
return _gf_true;
- return (sh->do_gfid_self_heal
- || sh->do_missing_entry_self_heal
- || (afr_data_self_heal_enabled (priv->data_self_heal) &&
- sh->do_data_self_heal)
- || (priv->metadata_self_heal && sh->do_metadata_self_heal)
- || (priv->entry_self_heal && sh->do_entry_self_heal));
+
+ if (local->self_heal.do_metadata_self_heal) {
+ if (local->attempt_self_heal || priv->metadata_self_heal)
+ return _gf_true;
+ }
+out:
+ return _gf_false;
+}
+
+gf_boolean_t
+afr_can_self_heal_proceed (afr_local_t *local, afr_private_t *priv)
+{
+ if (!local)
+ goto out;
+
+ if (local->self_heal.force_confirm_spb)
+ return _gf_true;
+
+ if (afr_can_start_missing_entry_gfid_self_heal (local, priv))
+ return _gf_true;
+
+ if (afr_can_start_entry_self_heal (local, priv))
+ return _gf_true;
+
+ if (afr_can_start_data_self_heal (local, priv))
+ return _gf_true;
+
+ if (afr_can_start_metadata_self_heal (local, priv))
+ return _gf_true;
+out:
+ return _gf_false;
}
afr_transaction_type
@@ -1840,7 +1911,7 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
}
afr_lookup_set_self_heal_params (local, this);
- if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
+ if (afr_can_self_heal_proceed (local, priv)) {
if (afr_is_transaction_running (local) &&
(!local->attempt_self_heal))
goto out;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 6f82761b33d..ab9e44b41e0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2307,6 +2307,7 @@ afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
this->name, priv->child_count);
if (ret)
goto out;
+ lc->attempt_self_heal = l->attempt_self_heal;
out:
if (ret) {
@@ -2474,7 +2475,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
+ if (afr_can_start_missing_entry_gfid_self_heal (local, priv)) {
afr_self_heal_missing_entries (sh_frame, this);
} else {
loc = &sh_local->loc;
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index e5ca84292fa..5c53f6a6a06 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1704,17 +1704,6 @@ afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
return 0;
}
-gf_boolean_t
-afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
-{
- if (sh->force_confirm_spb)
- return _gf_true;
- if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal))
- return _gf_true;
- return _gf_false;
-}
-
int
afr_self_heal_data (call_frame_t *frame, xlator_t *this)
{
@@ -1728,7 +1717,7 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
- if (afr_can_start_data_self_heal (sh, priv)) {
+ if (afr_can_start_data_self_heal (local, priv)) {
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
ret = afr_inodelk_init (&local->internal_lock.inodelk[1],
priv->sh_domain, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 8359079cea4..1ea957ad042 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -2396,7 +2396,7 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
- if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
+ if (afr_can_start_entry_self_heal (local, priv)) {
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_entrylk (frame, this, &local->loc, NULL,
afr_sh_post_nonblocking_entry_cbk);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index e26d3580e51..7abd852de6c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -688,8 +688,10 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
+ sh = &local->self_heal;
int_lock = &local->internal_lock;
if (int_lock->lock_op_ret < 0) {
@@ -697,6 +699,7 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
"inodelks failed for %s.", local->loc.path);
gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
"failed for %s.", local->loc.path);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_metadata_done (frame, this);
} else {
@@ -740,16 +743,6 @@ afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
return 0;
}
-gf_boolean_t
-afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
-{
- if (sh->force_confirm_spb)
- return _gf_true;
- if (sh->do_metadata_self_heal && priv->metadata_self_heal)
- return _gf_true;
- return _gf_false;
-}
-
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
@@ -761,7 +754,7 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
- if (afr_can_start_metadata_self_heal (sh, priv)) {
+ if (afr_can_start_metadata_self_heal (local, priv)) {
afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 676bd9b9466..ad8964ccbaa 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1216,4 +1216,16 @@ afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
afr_inode_ctx_t*
afr_inode_ctx_get (inode_t *inode, xlator_t *this);
+gf_boolean_t
+afr_can_start_missing_entry_gfid_self_heal (afr_local_t *local,
+ afr_private_t *priv);
+
+gf_boolean_t
+afr_can_start_entry_self_heal (afr_local_t *local, afr_private_t *priv);
+
+gf_boolean_t
+afr_can_start_data_self_heal (afr_local_t *local, afr_private_t *priv);
+
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_local_t *local, afr_private_t *priv);
#endif /* __AFR_H__ */