summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-common.c
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-01-10 09:19:34 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-03-01 03:23:20 -0800
commit8210ca1a5c0e78e91c6fab7df7e002e39660b706 (patch)
tree432a6836cc685760ee441f4b8e46221947247211 /xlators/cluster/afr/src/afr-common.c
parentea00992d3d52a51b7c8311ad9565bbbb6e395f9d (diff)
afr: Add throttled background client-side heals
If a heal is needed after inode refresh (lookup, read_txn), launch it in the background instead of blocking the fop (that triggered refresh) until the heal happens. afr_replies_interpret() is modified such that the heal is launched only if atleast one sink brick is up. Max. no of heals that can happen in parallel is configurable via the 'background-self-heal-count' volume option. Any number greater than that is put in a wait queue whose length is configurable via 'heal-wait-queue-leng' volume option. If the wait queue is also full, further heals will be ignored. Default values: background-self-heal-count=8, heal-wait-queue-leng=128 Change-Id: I1d4a52814cdfd43d90591b6d2ad7b6219937ce70 BUG: 1297172 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/13207 Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-common.c89
1 files changed, 41 insertions, 48 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 457f7865cec..1f68b87ef94 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -663,7 +663,8 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
int
-afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -734,6 +735,13 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
}
}
+ for (i = 0; i < priv->child_count; i++) {
+ if (start_heal && priv->child_up[i] &&
+ (!data_readable[i] || !metadata_readable[i])) {
+ *start_heal = _gf_true;
+ break;
+ }
+ }
afr_inode_read_subvol_set (inode, this, data_readable,
metadata_readable, event_generation);
return ret;
@@ -772,36 +780,6 @@ ret:
return -err;
}
-
-int
-afr_refresh_selfheal_wrap (void *opaque)
-{
- call_frame_t *frame = opaque;
- afr_local_t *local = NULL;
- xlator_t *this = NULL;
- int err = 0;
-
- local = frame->local;
- this = frame->this;
-
- afr_selfheal (frame->this, local->refreshinode->gfid);
-
- afr_selfheal_unlocked_discover (frame, local->refreshinode,
- local->refreshinode->gfid,
- local->replies);
-
- afr_replies_interpret (frame, this, local->refreshinode);
-
- err = afr_inode_refresh_err (frame, this);
-
- afr_local_replies_wipe (local, this->private);
-
- local->refreshfn (frame, this, err);
-
- return 0;
-}
-
-
gf_boolean_t
afr_selfheal_enabled (xlator_t *this)
{
@@ -817,35 +795,43 @@ afr_selfheal_enabled (xlator_t *this)
return data || priv->metadata_self_heal || priv->entry_self_heal;
}
-
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *heal = NULL;
+ call_frame_t *heal_frame = NULL;
afr_local_t *local = NULL;
+ gf_boolean_t start_heal = _gf_false;
+ afr_local_t *heal_local = NULL;
+ int op_errno = ENOMEM;
int ret = 0;
int err = 0;
local = frame->local;
- ret = afr_replies_interpret (frame, this, local->refreshinode);
+ ret = afr_replies_interpret (frame, this, local->refreshinode,
+ &start_heal);
err = afr_inode_refresh_err (frame, this);
afr_local_replies_wipe (local, this->private);
- if (ret && afr_selfheal_enabled (this)) {
- heal = copy_frame (frame);
- if (heal)
- heal->root->pid = GF_CLIENT_PID_SELF_HEALD;
- ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
- afr_refresh_selfheal_done, heal, frame);
- if (ret)
- goto refresh_done;
- } else {
- refresh_done:
- local->refreshfn (frame, this, err);
- }
+ if (ret && afr_selfheal_enabled (this) && start_heal) {
+ heal_frame = copy_frame (frame);
+ if (!heal_frame)
+ goto refresh_done;
+ heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
+ if (!heal_local) {
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->refreshinode = inode_ref (local->refreshinode);
+ heal_local->heal_frame = heal_frame;
+ afr_throttled_selfheal (heal_frame, this);
+ }
+
+refresh_done:
+ local->refreshfn (frame, this, err);
return 0;
}
@@ -1758,7 +1744,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
*/
gf_uuid_copy (args.gfid, read_gfid);
args.ia_type = ia_type;
- if (afr_replies_interpret (frame, this, local->inode)) {
+ if (afr_replies_interpret (frame, this, local->inode, NULL)) {
read_subvol = afr_read_subvol_decide (local->inode,
this, &args);
afr_inode_read_subvol_reset (local->inode, this);
@@ -2214,7 +2200,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
goto unwind;
}
- afr_replies_interpret (frame, this, local->inode);
+ afr_replies_interpret (frame, this, local->inode, NULL);
read_subvol = afr_read_subvol_decide (local->inode, this, NULL);
if (read_subvol == -1) {
@@ -3863,6 +3849,12 @@ afr_priv_dump (xlator_t *this)
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads);
+ gf_proc_dump_write("heal-wait-queue-length", "%d",
+ priv->heal_wait_qlen);
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
+ gf_proc_dump_write("background-self-heal-count", "%d",
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
return 0;
}
@@ -4169,6 +4161,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
goto out;
}
+ INIT_LIST_HEAD (&local->healer);
return 0;
out:
return -1;