summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-01-09 16:55:11 +0530
committerVijay Bellur <vbellur@redhat.com>2014-04-28 09:44:58 -0700
commit800258b54a4a776430410eb949cfded147c4ae8a (patch)
tree81bd10a6e873868822e4271c43ddfe90f19c6132 /xlators/cluster/afr/src
parent26843b00447cc14427b4f02ca136033b56ca093f (diff)
cluster/afr: Prevent heal info hang when data-self-heal in progress.
Problem: For determining whether data-self-heal is needed afr takes blocking locks. So if self-heal is indeed in progress on the file, this leads to hangs. heal info hung for almost 50 minutes when a 50G file is undergoing heal. Fix: When self-heal is in progress there is a live self-heal-domain lock. In this stage if a non-blocking inodelk for self-heal-domain lock is performed it will fail with EAGAIN. For heal info we can use this logic to determing that the file is possibly undergoing heal and inform it to user instead of waiting for the completion of self-heal. Change-Id: I18527c59e429602bae49c98ff45502833ab8e1f0 BUG: 1039544 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/7482 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c1
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c21
-rw-r--r--xlators/cluster/afr/src/afr.h2
5 files changed, 25 insertions, 3 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 1e57ebb9d7c..6a453060c9e 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1822,6 +1822,8 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
if (ret)
gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
"sh-failed to %d", local->loc.path, sh_failed);
+ ret = dict_set_int32 (xattr, "possibly-healing",
+ local->self_heal.possibly_healing);
} else {
ret = dict_set_int32 (xattr, "metadata-self-heal-pending",
local->self_heal.metadata_sh_pending);
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 060d78f3505..c492114c6f6 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1432,6 +1432,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret < 0) {
+ int_lock->lock_op_errno = op_errno;
if (op_errno == ENOSYS) {
/* return ENOTSUP */
gf_log (this->name, GF_LOG_ERROR,
@@ -1440,7 +1441,6 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"server");
local->op_ret = op_ret;
int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
local->op_errno = op_errno;
}
if (local->transaction.eager_lock)
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 4916bf45d68..8861870372d 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2360,6 +2360,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
orig_frame_sh->entry_sh_pending = sh->entry_sh_pending;
orig_frame_sh->data_sh_pending = sh->data_sh_pending;
orig_frame_sh->metadata_sh_pending = sh->metadata_sh_pending;
+ orig_frame_sh->possibly_healing = sh->possibly_healing;
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
is_self_heal_failed (sh, AFR_CHECK_ALL));
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 02c91c95710..e740ac9a308 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1418,6 +1418,22 @@ afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this)
}
int
+afr_sh_dom_lock_failure (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ int_lock = &local->internal_lock;
+ if (EAGAIN == int_lock->lock_op_errno)
+ sh->possibly_healing = _gf_true;
+ afr_sh_data_fail (frame, this);
+ return 0;
+}
+
+int
afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
@@ -1612,8 +1628,9 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"fd for %s opened, commencing sync",
local->loc.path);
- afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain,
- afr_sh_dom_lock_success, afr_sh_data_fail);
+ afr_sh_data_lock (frame, this, 0, 0, !sh->dry_run,
+ priv->sh_domain, afr_sh_dom_lock_success,
+ afr_sh_dom_lock_failure);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index ad8964ccbaa..49ca64c75c1 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -288,6 +288,8 @@ struct afr_self_heal_ {
int32_t dry_run;
gf_boolean_t metadata_sh_pending;
+ gf_boolean_t possibly_healing; //set when it is detected
+ //that a self-heal is in progress
gf_boolean_t data_sh_pending;
gf_boolean_t entry_sh_pending;