summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVenkatesh Somyajulu <vsomyaju@redhat.com>2013-06-28 19:11:47 +0530
committerVijay Bellur <vbellur@redhat.com>2013-07-02 10:25:17 -0700
commitef8092fab7b6fa5a16cc0e22b75945758519d5a6 (patch)
tree65aa4fa06801b135d61a5e57637dad882793e73e
parent7062eda1575214819f5c7411748b06be95e08ffa (diff)
cluster/afr: Allow data/entry self heal for metadata split-brain
Problem: Currently whenever there is metadata split-brain, a variable sh->op_failed is set to 1 to denote that self heal got failed. But if we proceed for data self heal, even code-path of data self heal also relies on the sh->op_failed variable. So if will check for sh->op_failed variable and will eventually fails to do data self heal. So needed a mechanism to allow data self heal even if metadata is in split brain. Fix: Some data structure revamp is done in http://review.gluster.com/#/c/5106/ fix and this patch is based on the above fix. Now we can store which particular self-heal got failed i.e GFID_OR_MISSING_ENTRY_SELF_HEAL, METADATA, DATA, ENTRY. And we can do two types of self heal failure check. 1. Individual type check: We can check which among all four (Metadata, Data, Gfid or missing entry, entry self heal) got failed. 2. In afr_self_heal_completion_cbk, we need to make check based on the fact that if any specific self heal got failed treat the complete self heal as failure so that it will populate corresponding circular buffer of event history accordingly. Change-Id: Icb91e513bcc752386fc8a78812405cfabe5cac2d BUG: 977797 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Reviewed-on: http://review.gluster.org/5253 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rwxr-xr-xtests/bugs/bug-977797.t114
-rw-r--r--tests/volume.rc20
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c29
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c167
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c24
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c8
-rw-r--r--xlators/cluster/afr/src/afr.h15
9 files changed, 285 insertions, 134 deletions
diff --git a/tests/bugs/bug-977797.t b/tests/bugs/bug-977797.t
new file mode 100755
index 00000000000..08cdbe8f119
--- /dev/null
+++ b/tests/bugs/bug-977797.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume set $V0 quick-read off
+TEST $CLI volume set $V0 read-ahead off
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 io-cache off
+TEST $CLI volume set $V0 background-self-heal-count 0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+
+TEST mkdir -p $M0/a
+TEST `echo "GLUSTERFS" > $M0/a/file`
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+TEST chown root $M0/a
+TEST chown root $M0/a/file
+TEST `echo "GLUSTER-FILE-SYSTEM" > $M0/a/file`
+TEST mkdir $M0/a/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0;
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"2"
+
+TEST chmod 757 $M0/a
+TEST chmod 757 $M0/a/file
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1;
+
+TEST ls -l $M0/a/file
+
+b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
+ trusted.afr.$V0-client-0 "entry")
+b1c1dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
+ trusted.afr.$V0-client-1 "entry")
+b2c0dir=$(afr_get_specific_changelog_xattr \
+ $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry")
+b2c1dir=$(afr_get_specific_changelog_xattr \
+ $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry")
+
+
+b1c0f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
+ trusted.afr.$V0-client-0 "data")
+b1c1f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
+ trusted.afr.$V0-client-1 "data")
+b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
+ trusted.afr.$V0-client-0 "data")
+b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
+ trusted.afr.$V0-client-1 "data")
+
+EXPECT "00000000" echo $b1c0f
+EXPECT "00000000" echo $b1c1f
+EXPECT "00000000" echo $b2c0f
+EXPECT "00000000" echo $b2c1f
+
+EXPECT "00000000" echo $b1c0dir
+EXPECT "00000000" echo $b1c1dir
+EXPECT "00000000" echo $b2c0dir
+EXPECT "00000000" echo $b2c1dir
+
+contains() {
+ string="$1"
+ substring="$2"
+ var="-1"
+ if test "${string#*$substring}" != "$string"
+ then
+ var="0" # $substring is in $string
+ else
+ var="1" # $substring is not in $string
+ fi
+ echo $var
+}
+
+var1=$(cat $M0/a/file 2>&1)
+var2="Input/output error"
+
+
+EXPECT "0" contains "$var1" "$var2"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 5bb974e1f50..b358b108d9d 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -237,6 +237,26 @@ function dht_get_layout {
getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
}
+function afr_get_specific_changelog_xattr ()
+{
+ local path=$1
+ local key=$2
+ local type=$3
+ local specific_changelog=""
+
+ changelog_xattr=$(afr_get_changelog_xattr "$path" "$key")
+ if [ "$type" == "data" ]; then
+ specific_changelog=${changelog_xattr:2:8}
+ elif [ "$type" == "metadata" ]; then
+ specific_changelog=${changelog_xattr:10:8}
+ elif [ "$type" == "entry" ]; then
+ specific_changelog=${changelog_xattr:18:8}
+ else
+ specific_changlog="error"
+ fi
+
+ echo $specific_changelog
+}
##
# query pathinfo xattr and extract POSIX pathname(s)
##
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 22e074571ed..1d577cfb5ab 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -100,7 +100,7 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
}
sh_private_cleanup (sh_frame, this);
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
GF_ASSERT (!last_loop_frame);
//loop_finish should have happened and the old_loop should be NULL
gf_log (this->name, GF_LOG_DEBUG,
@@ -276,7 +276,7 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
_gf_true, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
out:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (old_loop_frame)
sh_loop_finish (old_loop_frame, this);
sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
@@ -307,8 +307,9 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
sh_priv->loops_running--;
offset = sh_priv->offset;
block_size = sh->block_size;
- while ((!sh->eof_reached) && (!is_self_heal_failed (sh)) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
@@ -327,7 +328,8 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
if (0 == loop) {
//loop finish does unlock, but the erasing of the pending
//xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !is_self_heal_failed (sh))
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
goto driver_done;
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@@ -338,7 +340,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
//If we have more loops to form we should finish previous loop after
//the next loop lock
while (loop--) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
// op failed in other loop, stop spawning more loops
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@@ -384,7 +386,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame
}
if (op_ret == -1) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
if (loop_frame) {
sh_loop_finish (loop_frame, this);
@@ -432,7 +434,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (loop_sh, op_errno);
} else if (op_ret < loop_local->cont.writev.vector->iov_len) {
gf_log (this->name, GF_LOG_ERROR,
@@ -440,7 +442,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"(expected %lu, returned %d)", sh_local->loc.path,
priv->children[child_index]->name,
loop_local->cont.writev.vector->iov_len, op_ret);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (loop_frame);
@@ -514,7 +516,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
if (op_ret <= 0) {
if (op_ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
"for %s reason :%s", sh->source,
sh_local->loc.path, strerror (errno));
@@ -624,7 +626,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
strong_checksum, MD5_DIGEST_LENGTH);
@@ -662,7 +664,8 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
}
UNLOCK (&sh_priv->lock);
- if (write_needed && !is_self_heal_failed (sh)) {
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh_loop_read (loop_frame, this);
} else {
sh_loop_return (sh_frame, this, loop_frame,
@@ -800,7 +803,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
ret = 0;
out:
if (ret) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
sh_loop_driver_done (sh_frame, this, NULL);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5f985374f29..f0915b01d2e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1018,7 +1018,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
local->loc.path);
}
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh->completion_cbk (frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1250,7 +1250,7 @@ out:
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
"reason: %s", local->loc.path, strerror (-ret));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
afr_sh_missing_entries_finish (frame, this);
}
@@ -1265,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
local = frame->local;
sh = &local->self_heal;
if (op_ret < 0)
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return 0;
}
@@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
}
return;
out:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
LOCK (&frame->lock);
{
afr_sh_set_error (sh, EIO);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
}
@@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_missing_entries_finish (frame, this);
} else {
if (afr_gfid_missing_count (this->name, sh->fresh_children,
@@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
afr_sh_set_error (sh, ENOENT);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_purge_entry (frame, this);
} else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
priv->child_count, local->loc.path,
@@ -1787,7 +1787,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
return;
fail:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1858,7 +1858,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
out:
afr_sh_set_error (sh, op_errno);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return;
}
@@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
@@ -2047,8 +2047,9 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_gfid_or_missing_entry_sh_status;
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_self_heal_parent_entrylk (frame, this,
afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
@@ -2176,7 +2177,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
- if (is_self_heal_failed (sh) && !priv->shd.iamshd) {
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
loglevel = GF_LOG_ERROR;
} else {
loglevel = GF_LOG_DEBUG;
@@ -2191,7 +2192,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
orig_frame_sh = &orig_frame_local->self_heal;
orig_frame_sh->actual_sh_started = _gf_true;
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- is_self_heal_failed (sh));
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@@ -2305,6 +2306,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh->do_gfid_self_heal = _gf_false;
}
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
+
FRAME_SU_DO (sh_frame, afr_local_t);
if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
afr_self_heal_missing_entries (sh_frame, this);
@@ -2514,7 +2517,7 @@ out:
GF_FREE (erase_xattr);
if (ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
finish (frame, this);
}
@@ -2522,59 +2525,39 @@ out:
}
void
-afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (sh)
- sh->afr_all_sh_status.data_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-
-void
-afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (sh)
- sh->afr_all_sh_status.metadata_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-
-void
-afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
{
- xlator_t *this = NULL;
-
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
this = THIS;
- if (sh)
- sh->afr_all_sh_status.entry_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-void
-afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
- afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
- if (sh)
- sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
}
void
@@ -2585,22 +2568,58 @@ afr_set_local_for_unhealable (afr_local_t *local)
sh = &local->self_heal;
local->unhealable = 1;
- if (sh->afr_set_self_heal_status)
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
int
-is_self_heal_failed (afr_self_heal_t *sh)
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
{
- afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
- int sh_failed = 0;
- if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
- sh_failed = 1;
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
+
+ }
+
+out:
return sh_failed;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 329bb2f1ed0..cc67e23fe95 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -15,13 +15,6 @@
#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
-
-typedef enum {
AFR_LOOKUP_FAIL_CONFLICTS = 1,
AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
} afr_lookup_flags_t;
@@ -138,20 +131,10 @@ void
afr_set_local_for_unhealable (afr_local_t *local);
int
-is_self_heal_failed (afr_self_heal_t *sh);
-
-void
-afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status);
-
-void
-afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status staus);
-
-void
-afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status);
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
void
-afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
- afr_self_heal_status status);
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
void
afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index fc7f5e7ac4b..9c2f3d53c83 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -335,7 +335,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"finishing failed data selfheal of %s", local->loc.path);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (sh->data_lock_held)
afr_sh_data_unlock (frame, this, afr_sh_data_close);
else
@@ -362,13 +362,13 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
"log failed on %s for subvol %s, reason: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
if (sh->old_loop_frame)
sh_loop_finish (sh->old_loop_frame, this);
sh->old_loop_frame = NULL;
@@ -418,7 +418,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index]->name, strerror (op_errno));
LOCK (&frame->lock);
{
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
if (sh->old_loop_frame)
@@ -428,7 +428,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh))
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
afr_sh_data_fail (frame, this);
else
afr_sh_data_erase_pending (frame, this);
@@ -604,7 +604,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"ftruncate of %s on subvolume %s completed",
@@ -617,7 +617,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh))
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
afr_sh_data_fail (frame, this);
else
afr_sh_data_sync_prepare (frame, this);
@@ -718,7 +718,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
if (sh->background && sh->unwind && !sh->unwound) {
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- is_self_heal_failed (sh));
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
sh->unwound = _gf_true;
}
@@ -1342,7 +1342,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"open of %s succeeded on child %s",
@@ -1355,7 +1355,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_data_fail (frame, this);
return 0;
}
@@ -1485,10 +1485,10 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_data_sh_status;
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
if (afr_can_start_data_self_heal (sh, priv)) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
if (IA_ISREG (sh->type)) {
afr_sh_data_open (frame, this);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 14ccca21b8b..3598f79d1ff 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -162,7 +162,7 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
if (sh->entries_skipped) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
goto out;
}
afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
@@ -799,7 +799,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_sink (frame, this, sh->active_source);
sh->active_source = active_src;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
goto out;
}
@@ -1946,7 +1946,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
local->loc.path,
priv->children[active_src]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"readdir of %s on subvolume %s complete",
@@ -2019,7 +2019,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_source (frame, this, sh->active_source);
sh->active_source = active_src;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2068,7 +2068,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
}
UNLOCK (&frame->lock);
@@ -2076,7 +2076,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2231,7 +2231,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_entry_finish (frame, this);
goto out;
@@ -2294,7 +2294,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
"failed for %s.", local->loc.path);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_entry_done (frame, this);
} else {
@@ -2321,9 +2321,10 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_entry_sh_status;
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
+
if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_entrylk (frame, this, &local->loc, NULL,
afr_sh_post_nonblocking_entry_cbk);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ac2d7fcc668..1f663b692fc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -97,7 +97,7 @@ afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_metadata_finish (frame, this);
return 0;
}
@@ -461,7 +461,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_metadata_finish (frame, this);
goto out;
@@ -618,10 +618,10 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_metadata_sh_status;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
if (afr_can_start_metadata_self_heal (sh, priv)) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index cbe6b339d08..9594a828773 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -185,6 +185,18 @@ typedef struct {
afr_self_heal_status entry_self_heal;
} afr_sh_status_for_all_type;
+typedef enum {
+ AFR_SELF_HEAL_ENTRY,
+ AFR_SELF_HEAL_METADATA,
+ AFR_SELF_HEAL_DATA,
+ AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
+ AFR_SELF_HEAL_INVALID = -1,
+} afr_self_heal_type;
+
+typedef enum {
+ AFR_CHECK_ALL,
+ AFR_CHECK_SPECIFIC,
+} afr_sh_fail_check_type;
struct afr_self_heal_ {
/* External interface: These are variables (some optional) that
@@ -283,9 +295,8 @@ struct afr_self_heal_ {
afr_sh_algo_private_t *private;
afr_sh_status_for_all_type afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action;
- void (*afr_set_self_heal_status) (struct afr_self_heal_ *sh,
- afr_self_heal_status status);
struct afr_sh_algorithm *algo;
afr_lock_cbk_t data_lock_success_handler;
afr_lock_cbk_t data_lock_failure_handler;