From 4163a3f8fbcc5e0d84d955258161f3a6f7a86de8 Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Fri, 23 Oct 2009 10:11:47 +0000 Subject: cluster/afr: Do self-heal in the background. This patch introduces a new option "background-self-heal-count", with a default value of 16. This means that upto {background-self-heal-count} number of files/directories will be healed in the background at any given time. If such number of self-heals are already in progress, further self-heals take place in the foreground. Signed-off-by: Anand V. Avati BUG: 320 (Improve self-heal performance) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=320 --- xlators/cluster/afr/src/afr-self-heal-common.c | 137 +++++++++++++++++++++++-- xlators/cluster/afr/src/afr-self-heal-data.c | 4 + xlators/cluster/afr/src/afr-self-heal-entry.c | 4 + xlators/cluster/afr/src/afr.c | 17 +++ xlators/cluster/afr/src/afr.h | 12 +++ 5 files changed, 163 insertions(+), 11 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 42d57a1869a..6d4802fd6fe 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1368,6 +1368,101 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) } +afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *lc = NULL; + + priv = this->private; + + lc = CALLOC (1, sizeof (afr_local_t)); + + memcpy (lc, l, sizeof (afr_local_t)); + + loc_copy (&lc->loc, &l->loc); + + lc->child_up = memdup (l->child_up, priv->child_count); + lc->xattr_req = dict_copy_with_ref (l->xattr_req, NULL); + + lc->cont.lookup.inode = l->cont.lookup.inode; + lc->cont.lookup.xattr = dict_copy_with_ref (l->cont.lookup.xattr, NULL); + + return lc; +} + + +int +afr_bgsh_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = bgsh_frame->local; + sh = &local->self_heal; + + if (local->govinda_gOvinda) { + afr_set_split_brain (this, local->cont.lookup.inode, 1); + } else { + afr_set_split_brain (this, local->cont.lookup.inode, 0); + } + + gf_log (this->name, GF_LOG_TRACE, + "background self-heal completed"); + + if (!sh->unwound) { + AFR_STACK_UNWIND (lookup, sh->orig_frame, + local->op_ret, local->op_errno, + local->cont.lookup.inode, + &local->cont.lookup.buf, + local->cont.lookup.xattr, + NULL); + } + + LOCK (&priv->lock); + { + priv->background_self_heals_started--; + } + UNLOCK (&priv->lock); + + AFR_STACK_DESTROY (bgsh_frame); + + return 0; +} + + +int +afr_bgsh_unwind (call_frame_t *bgsh_frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + local = bgsh_frame->local; + sh = &local->self_heal; + + if (local->govinda_gOvinda) { + afr_set_split_brain (this, local->cont.lookup.inode, 1); + } else { + afr_set_split_brain (this, local->cont.lookup.inode, 0); + } + + gf_log (this->name, GF_LOG_TRACE, + "unwinding lookup and continuing self-heal in the background"); + + sh->unwound = _gf_true; + + AFR_STACK_UNWIND (lookup, sh->orig_frame, + local->op_ret, local->op_errno, + local->cont.lookup.inode, + &local->cont.lookup.buf, + local->cont.lookup.xattr, + NULL); + + return 0; +} + + int afr_self_heal (call_frame_t *frame, xlator_t *this, int (*completion_cbk) (call_frame_t *, xlator_t *)) @@ -1376,20 +1471,39 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; int i = 0; + int background = 0; + call_frame_t *sh_frame = NULL; + afr_local_t *sh_local = NULL; local = frame->local; - sh = &local->self_heal; - priv = this->private; + priv = this->private; - gf_log (this->name, GF_LOG_TRACE, - "performing self heal on %s (metadata=%d data=%d entry=%d)", - local->loc.path, - local->need_metadata_self_heal, - local->need_data_self_heal, - local->need_entry_self_heal); + LOCK (&priv->lock); + { + if (priv->background_self_heals_started < priv->background_self_heal_count) { + priv->background_self_heals_started++; + background = 1; + } + } + UNLOCK (&priv->lock); + + gf_log (this->name, GF_LOG_TRACE, + "performing self heal on %s (metadata=%d data=%d entry=%d)", + local->loc.path, + local->need_metadata_self_heal, + local->need_data_self_heal, + local->need_entry_self_heal); - sh->completion_cbk = completion_cbk; + sh_frame = copy_frame (frame); + sh_local = afr_local_copy (local, this); + sh_frame->local = sh_local; + sh = &sh_local->self_heal; + + sh->background = _gf_true; + sh->orig_frame = frame; + sh->completion_cbk = afr_bgsh_completion_cbk; + sh->unwind = afr_bgsh_unwind; sh->buf = CALLOC (priv->child_count, sizeof (struct stat)); sh->child_errno = CALLOC (priv->child_count, sizeof (int)); @@ -1411,12 +1525,13 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, } if (local->success_count && local->enoent_count) { - afr_self_heal_missing_entries (frame, this); + afr_self_heal_missing_entries (sh_frame, this); } else { gf_log (this->name, GF_LOG_TRACE, "proceeding to metadata check on %s", local->loc.path); - afr_sh_missing_entries_done (frame, this); + + afr_sh_missing_entries_done (sh_frame, this); } return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index d2224ec9263..718c716f55a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -768,6 +768,10 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) sh->sources[i] = 0; } + if (sh->background) { + sh->unwind (frame, this); + } + afr_sh_data_sync_prepare (frame, this); return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index cf7bf9a9c0d..1e36dee69c0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1977,6 +1977,10 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this) source = afr_sh_select_source (sh->sources, priv->child_count); sh->source = source; + if (sh->background) { + sh->unwind (frame, this); + } + afr_sh_entry_sync_prepare (frame, this); return 0; diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index bf023ce27ce..641fa1551ec 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -2337,6 +2337,7 @@ init (xlator_t *this) char * algo = NULL; char * change_log = NULL; + int32_t background_count = 0; int32_t lock_server_count = 1; int32_t window_size; @@ -2366,6 +2367,18 @@ init (xlator_t *this) fav_ret = dict_get_str (this->options, "favorite-child", &fav_child); priv->favorite_child = -1; + priv->background_self_heal_count = 16; + + dict_ret = dict_get_int32 (this->options, "background-self-heal-count", + &background_count); + if (dict_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Setting background self-heal count to %d.", + window_size); + + priv->background_self_heal_count = background_count; + } + /* Default values */ priv->data_self_heal = 1; @@ -2673,6 +2686,10 @@ struct volume_options options[] = { { .key = {"favorite-child"}, .type = GF_OPTION_TYPE_XLATOR }, + { .key = {"background-self-heal-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 0 + }, { .key = {"data-self-heal"}, .type = GF_OPTION_TYPE_BOOL }, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index e3ab4ebe494..98584af7678 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -50,6 +50,8 @@ typedef struct _afr_private { unsigned int data_self_heal_window_size; /* max number of pipelined read/writes */ + unsigned int background_self_heal_count; + unsigned int background_self_heals_started; gf_boolean_t metadata_self_heal; /* on/off */ gf_boolean_t entry_self_heal; /* on/off */ @@ -99,6 +101,11 @@ typedef struct { loc_t parent_loc; + call_frame_t *orig_frame; + gf_boolean_t unwound; + gf_boolean_t background; /* is this self-heal in the background? */ + int (*unwind) (call_frame_t *frame, xlator_t *this); + /* private data for the particular self-heal algorithm */ void *private; @@ -530,6 +537,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this); int afr_frame_return (call_frame_t *frame); +void +afr_set_split_brain (xlator_t *this, inode_t *inode, int32_t split_brain); + + + #define AFR_STACK_UNWIND(fop, frame, params ...) \ do { \ afr_local_t *__local = NULL; \ -- cgit