summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorVenkatesh Somyajulu <vsomyaju@redhat.com>2013-06-28 19:11:47 +0530
committerVijay Bellur <vbellur@redhat.com>2013-07-02 10:25:17 -0700
commitef8092fab7b6fa5a16cc0e22b75945758519d5a6 (patch)
tree65aa4fa06801b135d61a5e57637dad882793e73e /xlators/cluster
parent7062eda1575214819f5c7411748b06be95e08ffa (diff)
cluster/afr: Allow data/entry self heal for metadata split-brain
Problem: Currently whenever there is metadata split-brain, a variable sh->op_failed is set to 1 to denote that self heal got failed. But if we proceed for data self heal, even code-path of data self heal also relies on the sh->op_failed variable. So if will check for sh->op_failed variable and will eventually fails to do data self heal. So needed a mechanism to allow data self heal even if metadata is in split brain. Fix: Some data structure revamp is done in http://review.gluster.com/#/c/5106/ fix and this patch is based on the above fix. Now we can store which particular self-heal got failed i.e GFID_OR_MISSING_ENTRY_SELF_HEAL, METADATA, DATA, ENTRY. And we can do two types of self heal failure check. 1. Individual type check: We can check which among all four (Metadata, Data, Gfid or missing entry, entry self heal) got failed. 2. In afr_self_heal_completion_cbk, we need to make check based on the fact that if any specific self heal got failed treat the complete self heal as failure so that it will populate corresponding circular buffer of event history accordingly. Change-Id: Icb91e513bcc752386fc8a78812405cfabe5cac2d BUG: 977797 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Reviewed-on: http://review.gluster.org/5253 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c29
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c167
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c24
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c8
-rw-r--r--xlators/cluster/afr/src/afr.h15
7 files changed, 151 insertions, 134 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 22e07457..1d577cfb 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -100,7 +100,7 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
}
sh_private_cleanup (sh_frame, this);
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
GF_ASSERT (!last_loop_frame);
//loop_finish should have happened and the old_loop should be NULL
gf_log (this->name, GF_LOG_DEBUG,
@@ -276,7 +276,7 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
_gf_true, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
out:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (old_loop_frame)
sh_loop_finish (old_loop_frame, this);
sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
@@ -307,8 +307,9 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
sh_priv->loops_running--;
offset = sh_priv->offset;
block_size = sh->block_size;
- while ((!sh->eof_reached) && (!is_self_heal_failed (sh)) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
@@ -327,7 +328,8 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
if (0 == loop) {
//loop finish does unlock, but the erasing of the pending
//xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !is_self_heal_failed (sh))
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
goto driver_done;
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@@ -338,7 +340,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
//If we have more loops to form we should finish previous loop after
//the next loop lock
while (loop--) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
// op failed in other loop, stop spawning more loops
if (old_loop_frame) {
sh_loop_finish (old_loop_frame, this);
@@ -384,7 +386,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame
}
if (op_ret == -1) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
if (loop_frame) {
sh_loop_finish (loop_frame, this);
@@ -432,7 +434,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (loop_sh, op_errno);
} else if (op_ret < loop_local->cont.writev.vector->iov_len) {
gf_log (this->name, GF_LOG_ERROR,
@@ -440,7 +442,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"(expected %lu, returned %d)", sh_local->loc.path,
priv->children[child_index]->name,
loop_local->cont.writev.vector->iov_len, op_ret);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (loop_frame);
@@ -514,7 +516,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
if (op_ret <= 0) {
if (op_ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
"for %s reason :%s", sh->source,
sh_local->loc.path, strerror (errno));
@@ -624,7 +626,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
strong_checksum, MD5_DIGEST_LENGTH);
@@ -662,7 +664,8 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
}
UNLOCK (&sh_priv->lock);
- if (write_needed && !is_self_heal_failed (sh)) {
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh_loop_read (loop_frame, this);
} else {
sh_loop_return (sh_frame, this, loop_frame,
@@ -800,7 +803,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
ret = 0;
out:
if (ret) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
sh_loop_driver_done (sh_frame, this, NULL);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5f985374..f0915b01 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1018,7 +1018,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
local->loc.path);
}
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh->completion_cbk (frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1250,7 +1250,7 @@ out:
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
"reason: %s", local->loc.path, strerror (-ret));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
afr_sh_missing_entries_finish (frame, this);
}
@@ -1265,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
local = frame->local;
sh = &local->self_heal;
if (op_ret < 0)
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return 0;
}
@@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
}
return;
out:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
LOCK (&frame->lock);
{
afr_sh_set_error (sh, EIO);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
}
@@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_missing_entries_finish (frame, this);
} else {
if (afr_gfid_missing_count (this->name, sh->fresh_children,
@@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
afr_sh_set_error (sh, ENOENT);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_purge_entry (frame, this);
} else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
priv->child_count, local->loc.path,
@@ -1787,7 +1787,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
return;
fail:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1858,7 +1858,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
out:
afr_sh_set_error (sh, op_errno);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return;
}
@@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
@@ -2047,8 +2047,9 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_gfid_or_missing_entry_sh_status;
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_self_heal_parent_entrylk (frame, this,
afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
@@ -2176,7 +2177,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
- if (is_self_heal_failed (sh) && !priv->shd.iamshd) {
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
loglevel = GF_LOG_ERROR;
} else {
loglevel = GF_LOG_DEBUG;
@@ -2191,7 +2192,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
orig_frame_sh = &orig_frame_local->self_heal;
orig_frame_sh->actual_sh_started = _gf_true;
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- is_self_heal_failed (sh));
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@@ -2305,6 +2306,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh->do_gfid_self_heal = _gf_false;
}
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
+
FRAME_SU_DO (sh_frame, afr_local_t);
if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
afr_self_heal_missing_entries (sh_frame, this);
@@ -2514,7 +2517,7 @@ out:
GF_FREE (erase_xattr);
if (ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
finish (frame, this);
}
@@ -2522,59 +2525,39 @@ out:
}
void
-afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (sh)
- sh->afr_all_sh_status.data_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-
-void
-afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (sh)
- sh->afr_all_sh_status.metadata_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-
-void
-afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
{
- xlator_t *this = NULL;
-
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
this = THIS;
- if (sh)
- sh->afr_all_sh_status.entry_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-void
-afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
- afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
- if (sh)
- sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
}
void
@@ -2585,22 +2568,58 @@ afr_set_local_for_unhealable (afr_local_t *local)
sh = &local->self_heal;
local->unhealable = 1;
- if (sh->afr_set_self_heal_status)
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
int
-is_self_heal_failed (afr_self_heal_t *sh)
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
{
- afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
- int sh_failed = 0;
- if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
- sh_failed = 1;
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
+
+ }
+
+out:
return sh_failed;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 329bb2f1..cc67e23f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -15,13 +15,6 @@
#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
-
-typedef enum {
AFR_LOOKUP_FAIL_CONFLICTS = 1,
AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
} afr_lookup_flags_t;
@@ -138,20 +131,10 @@ void
afr_set_local_for_unhealable (afr_local_t *local);
int
-is_self_heal_failed (afr_self_heal_t *sh);
-
-void
-afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status);
-
-void
-afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status staus);
-
-void
-afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status);
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
void
-afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
- afr_self_heal_status status);
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
void
afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index fc7f5e7a..9c2f3d53 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -335,7 +335,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"finishing failed data selfheal of %s", local->loc.path);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (sh->data_lock_held)
afr_sh_data_unlock (frame, this, afr_sh_data_close);
else
@@ -362,13 +362,13 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
"log failed on %s for subvol %s, reason: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
if (sh->old_loop_frame)
sh_loop_finish (sh->old_loop_frame, this);
sh->old_loop_frame = NULL;
@@ -418,7 +418,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index]->name, strerror (op_errno));
LOCK (&frame->lock);
{
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
if (sh->old_loop_frame)
@@ -428,7 +428,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh))
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
afr_sh_data_fail (frame, this);
else
afr_sh_data_erase_pending (frame, this);
@@ -604,7 +604,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"ftruncate of %s on subvolume %s completed",
@@ -617,7 +617,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh))
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
afr_sh_data_fail (frame, this);
else
afr_sh_data_sync_prepare (frame, this);
@@ -718,7 +718,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
if (sh->background && sh->unwind && !sh->unwound) {
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- is_self_heal_failed (sh));
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
sh->unwound = _gf_true;
}
@@ -1342,7 +1342,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"open of %s succeeded on child %s",
@@ -1355,7 +1355,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_data_fail (frame, this);
return 0;
}
@@ -1485,10 +1485,10 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_data_sh_status;
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
if (afr_can_start_data_self_heal (sh, priv)) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
if (IA_ISREG (sh->type)) {
afr_sh_data_open (frame, this);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 14ccca21..3598f79d 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -162,7 +162,7 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
if (sh->entries_skipped) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
goto out;
}
afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
@@ -799,7 +799,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_sink (frame, this, sh->active_source);
sh->active_source = active_src;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
goto out;
}
@@ -1946,7 +1946,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
local->loc.path,
priv->children[active_src]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"readdir of %s on subvolume %s complete",
@@ -2019,7 +2019,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_source (frame, this, sh->active_source);
sh->active_source = active_src;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2068,7 +2068,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
}
UNLOCK (&frame->lock);
@@ -2076,7 +2076,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -2231,7 +2231,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_entry_finish (frame, this);
goto out;
@@ -2294,7 +2294,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
"failed for %s.", local->loc.path);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_entry_done (frame, this);
} else {
@@ -2321,9 +2321,10 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_entry_sh_status;
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
+
if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_entrylk (frame, this, &local->loc, NULL,
afr_sh_post_nonblocking_entry_cbk);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ac2d7fcc..1f663b69 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -97,7 +97,7 @@ afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_metadata_finish (frame, this);
return 0;
}
@@ -461,7 +461,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
priv = this->private;
if (op_ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_metadata_finish (frame, this);
goto out;
@@ -618,10 +618,10 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_metadata_sh_status;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
if (afr_can_start_metadata_self_heal (sh, priv)) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index cbe6b339..9594a828 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -185,6 +185,18 @@ typedef struct {
afr_self_heal_status entry_self_heal;
} afr_sh_status_for_all_type;
+typedef enum {
+ AFR_SELF_HEAL_ENTRY,
+ AFR_SELF_HEAL_METADATA,
+ AFR_SELF_HEAL_DATA,
+ AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
+ AFR_SELF_HEAL_INVALID = -1,
+} afr_self_heal_type;
+
+typedef enum {
+ AFR_CHECK_ALL,
+ AFR_CHECK_SPECIFIC,
+} afr_sh_fail_check_type;
struct afr_self_heal_ {
/* External interface: These are variables (some optional) that
@@ -283,9 +295,8 @@ struct afr_self_heal_ {
afr_sh_algo_private_t *private;
afr_sh_status_for_all_type afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action;
- void (*afr_set_self_heal_status) (struct afr_self_heal_ *sh,
- afr_self_heal_status status);
struct afr_sh_algorithm *algo;
afr_lock_cbk_t data_lock_success_handler;
afr_lock_cbk_t data_lock_failure_handler;