From f276480174b7c62fbcf914e330563de063463634 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 26 Jun 2012 21:06:39 -0700 Subject: cluster/afr: post-op-delay support post-op-delay introduces an artificial delay between the OP and POST-OP-CHANGELOG phases of a write transaction to increase the probability of changelog-piggyback and eager-locking to work more efficiently. Change-Id: I865ca4b68512c44818719c7e388952f15d53e6c2 BUG: 836033 Signed-off-by: Anand Avati Signed-off-by: Pranith Kumar K Reviewed-on: https://code.engineering.redhat.com/gerrit/1858 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 3 + xlators/cluster/afr/src/afr-lk-common.c | 2 + xlators/cluster/afr/src/afr-transaction.c | 147 +++++++++++++++++++++++++++++- xlators/cluster/afr/src/afr-transaction.h | 2 + xlators/cluster/afr/src/afr.c | 13 +++ xlators/cluster/afr/src/afr.h | 7 ++ 6 files changed, 173 insertions(+), 1 deletion(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index d3e51822fe3..98aba9fd492 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2278,6 +2278,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd) goto out; } + pthread_mutex_init (&fd_ctx->delay_lock, NULL); INIT_LIST_HEAD (&fd_ctx->paused_calls); INIT_LIST_HEAD (&fd_ctx->entries); @@ -2532,6 +2533,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) if (fd_ctx->lock_acquired) GF_FREE (fd_ctx->lock_acquired); + pthread_mutex_destroy (&fd_ctx->delay_lock); + GF_FREE (fd_ctx); } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 555a9b9faf6..5e61be4d4df 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) piggyback = 0; local->transaction.eager_lock[i] = 1; + afr_set_delayed_post_op (frame, this); + LOCK (&local->fd->lock); { if (fd_ctx->lock_acquired[i]) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 92f3dd65de8..65dea16df42 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -11,6 +11,7 @@ #include "dict.h" #include "byte-order.h" #include "common-utils.h" +#include "timer.h" #include "afr.h" #include "afr-transaction.h" @@ -492,7 +493,7 @@ afr_changelog_post_op_call_count (afr_transaction_type type, } int -afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) { afr_private_t * priv = this->private; afr_internal_lock_t *int_lock = NULL; @@ -862,6 +863,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) } UNLOCK (&local->fd->lock); + afr_set_delayed_post_op (frame, this); + if (piggyback) afr_changelog_pre_op_cbk (frame, (void *)(long)i, this, 1, 0, xattr[i], @@ -1227,16 +1230,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this) } +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + /* call this function from any of the related optimizations + which benefit from delaying post op are enabled, namely: + + - changelog piggybacking + - eager locking + */ + + priv = this->private; + if (!priv) + return; + + if (!priv->post_op_delay_secs) + return; + + local = frame->local; + if (!local) + return; + + if (!local->fd) + return; + + if (local->op == GF_FOP_WRITE) + local->delayed_post_op = _gf_true; +} + + +gf_boolean_t +is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + gf_boolean_t res = _gf_false; + + local = frame->local; + if (!local) + goto out; + + if (!local->delayed_post_op) + goto out; + + res = _gf_true; +out: + return res; +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd); + +void +afr_delayed_changelog_wake_up_cbk (void *data) +{ + fd_t *fd = NULL; + + fd = data; + + afr_delayed_changelog_wake_up (THIS, fd); +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd) +{ + afr_fd_ctx_t *fd_ctx = NULL; + call_frame_t *prev_frame = NULL; + struct timeval delta = {0, }; + afr_private_t *priv = NULL; + + priv = this->private; + + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) + return; + + delta.tv_sec = priv->post_op_delay_secs; + delta.tv_usec = 0; + + pthread_mutex_lock (&fd_ctx->delay_lock); + { + prev_frame = fd_ctx->delay_frame; + fd_ctx->delay_frame = NULL; + if (fd_ctx->delay_timer) + gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer); + fd_ctx->delay_timer = NULL; + if (!frame) + goto unlock; + fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta, + afr_delayed_changelog_wake_up_cbk, + fd); + fd_ctx->delay_frame = frame; + } +unlock: + pthread_mutex_unlock (&fd_ctx->delay_lock); + + if (prev_frame) { + afr_changelog_post_op_now (prev_frame, this); + } +} + + +void +afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (is_afr_delayed_changelog_post_op_needed (frame, this)) + afr_delayed_changelog_post_op (this, frame, local->fd); + else + afr_changelog_post_op_now (frame, this); +} + + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) +{ + afr_delayed_changelog_post_op (this, NULL, fd); +} + + int afr_transaction_resume (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; + fd_t *fd = NULL; local = frame->local; int_lock = &local->internal_lock; priv = this->private; + fd = local->fd; + + if (fd) + /* The wake up needs to happen independent of + what type of fop arrives here. If it was + a write, then it has already inherited the + lock and changelog. If it was not a write, + then the presumption of the optimization (of + optimizing for successive write operations) + fails. + */ + afr_delayed_changelog_wake_up (this, fd); afr_restore_lk_owner (frame); diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 965ee67374c..69a8c1b1659 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -25,4 +25,6 @@ afr_fd_ctx_t * afr_fd_ctx_get (fd_t *fd, xlator_t *this); int afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending); +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this); #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 3f102f80885..51c51e56a72 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -165,6 +165,8 @@ reconfigure (xlator_t *this, dict_t *options) uint32, out); fix_quorum_options(this,priv,qtype); + GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options, + uint32, out); ret = 0; out: return ret; @@ -291,6 +293,8 @@ init (xlator_t *this) GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out); fix_quorum_options(this,priv,qtype); + GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out); + priv->wait_count = 1; priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, @@ -596,5 +600,14 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_STR, .description = "Local glusterd uuid string", }, + { .key = {"post-op-delay-secs"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "1", + .description = "Time interval induced artificially before " + "post-operation phase of the transaction to " + "enhance overlap of adjacent write operations.", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 256e93ad213..78d108db6d8 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -149,6 +149,7 @@ typedef struct _afr_private { struct list_head saved_fds; /* list of fds on which locks have succeeded */ gf_boolean_t optimistic_change_log; gf_boolean_t eager_lock; + uint32_t post_op_delay_secs; unsigned int quorum_count; char vol_uuid[UUID_SIZE + 1]; @@ -408,6 +409,7 @@ typedef struct _afr_local { dict_t *dict; int optimistic_change_log; + gf_boolean_t delayed_post_op; gf_boolean_t fop_paused; int (*fop_call_continue) (call_frame_t *frame, xlator_t *this); @@ -733,6 +735,11 @@ typedef struct { unsigned char *locked_on; /* which subvolumes locks have been successful */ struct list_head paused_calls; /* queued calls while fix_open happens */ + + /* used for delayed-post-op optimization */ + pthread_mutex_t delay_lock; + gf_timer_t *delay_timer; + call_frame_t *delay_frame; } afr_fd_ctx_t; -- cgit