diff options
| author | Anand Avati <avati@redhat.com> | 2012-06-26 21:06:39 -0700 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2012-07-04 00:17:47 -0700 | 
| commit | 5a3d849b8563067d35c1422e43e605bd9533f3c2 (patch) | |
| tree | 3af92a855743679a9f693a5ea9c35df67ff9c5dd | |
| parent | d90596a15c03434f14258d754e37b84e3ec57310 (diff) | |
cluster/afr: post-op-delay support
post-op-delay introduces an artificial delay between the OP and
POST-OP-CHANGELOG phases of a write transaction to increase the
probability of changelog-piggyback and eager-locking to work
more efficiently.
Also enable eager-locking by default.
Change-Id: I865ca4b68512c44818719c7e388952f15d53e6c2
BUG: 836033
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.com/3621
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pranithk@gluster.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 147 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 16 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 7 | 
6 files changed, 175 insertions, 2 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 407e76c0e74..6787dfc27d5 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2400,6 +2400,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)                  goto out;          } +	pthread_mutex_init (&fd_ctx->delay_lock, NULL);          INIT_LIST_HEAD (&fd_ctx->paused_calls);          INIT_LIST_HEAD (&fd_ctx->entries); @@ -2654,6 +2655,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)                  if (fd_ctx->lock_acquired)                          GF_FREE (fd_ctx->lock_acquired); +		pthread_mutex_destroy (&fd_ctx->delay_lock); +                  GF_FREE (fd_ctx);          } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 555a9b9faf6..5e61be4d4df 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)                          piggyback = 0;                          local->transaction.eager_lock[i] = 1; +			afr_set_delayed_post_op (frame, this); +                          LOCK (&local->fd->lock);                          {                                  if (fd_ctx->lock_acquired[i]) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 0e9956d7bed..c4378d4451f 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -11,6 +11,7 @@  #include "dict.h"  #include "byte-order.h"  #include "common-utils.h" +#include "timer.h"  #include "afr.h"  #include "afr-transaction.h" @@ -523,7 +524,7 @@ afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,  }  int -afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)  {          afr_private_t * priv = this->private;          afr_internal_lock_t *int_lock = NULL; @@ -877,6 +878,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)                          }                          UNLOCK (&local->fd->lock); +			afr_set_delayed_post_op (frame, this); +                          if (piggyback)                                  afr_changelog_pre_op_cbk (frame, (void *)(long)i,                                                            this, 1, 0, xattr[i], @@ -1243,16 +1246,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)  } +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this) +{ +	afr_local_t    *local = NULL; +	afr_private_t  *priv = NULL; + +	/* call this function from any of the related optimizations +	   which benefit from delaying post op are enabled, namely: + +	   - changelog piggybacking +	   - eager locking +	*/ + +	priv = this->private; +	if (!priv) +		return; + +	if (!priv->post_op_delay_secs) +		return; + +	local = frame->local; +	if (!local) +		return; + +	if (!local->fd) +		return; + +	if (local->op == GF_FOP_WRITE) +		local->delayed_post_op = _gf_true; +} + + +gf_boolean_t +is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) +{ +	afr_local_t      *local = NULL; +	gf_boolean_t      res = _gf_false; + +	local = frame->local; +	if (!local) +		goto out; + +	if (!local->delayed_post_op) +		goto out; + +	res = _gf_true; +out: +	return res; +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd); + +void +afr_delayed_changelog_wake_up_cbk (void *data) +{ +	fd_t           *fd = NULL; + +	fd = data; + +	afr_delayed_changelog_wake_up (THIS, fd); +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd) +{ +	afr_fd_ctx_t      *fd_ctx = NULL; +	call_frame_t      *prev_frame = NULL; +	struct timeval     delta = {0, }; +	afr_private_t     *priv = NULL; + +	priv = this->private; + +	fd_ctx = afr_fd_ctx_get (fd, this); +	if (!fd_ctx) +		return; + +	delta.tv_sec = priv->post_op_delay_secs; +	delta.tv_usec = 0; + +	pthread_mutex_lock (&fd_ctx->delay_lock); +	{ +		prev_frame = fd_ctx->delay_frame; +		fd_ctx->delay_frame = NULL; +		if (fd_ctx->delay_timer) +			gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer); +		fd_ctx->delay_timer = NULL; +		if (!frame) +			goto unlock; +		fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta, +							   afr_delayed_changelog_wake_up_cbk, +							   fd); +		fd_ctx->delay_frame = frame; +	} +unlock: +	pthread_mutex_unlock (&fd_ctx->delay_lock); + +	if (prev_frame) { +		afr_changelog_post_op_now (prev_frame, this); +	} +} + + +void +afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +{ +	afr_local_t  *local = NULL; + +	local = frame->local; + +	if (is_afr_delayed_changelog_post_op_needed (frame, this)) +		afr_delayed_changelog_post_op (this, frame, local->fd); +	else +		afr_changelog_post_op_now (frame, this); +} + + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) +{ +	afr_delayed_changelog_post_op (this, NULL, fd); +} + +  int  afr_transaction_resume (call_frame_t *frame, xlator_t *this)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL;          afr_private_t       *priv     = NULL; +	fd_t                *fd = NULL;          local    = frame->local;          int_lock = &local->internal_lock;          priv     = this->private; +	fd       = local->fd; + +	if (fd) +		/* The wake up needs to happen independent of +		   what type of fop arrives here. If it was +		   a write, then it has already inherited the +		   lock and changelog. If it was not a write, +		   then the presumption of the optimization (of +		   optimizing for successive write operations) +		   fails. +		*/ +		afr_delayed_changelog_wake_up (this, fd);  	afr_restore_lk_owner (frame); diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index d19d16fb39e..102599633e3 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -31,4 +31,6 @@ afr_fd_ctx_get (fd_t *fd, xlator_t *this);  int  afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,                        int child, afr_xattrop_type_t op); +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);  #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index cd84c357da8..108c82778db 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -183,6 +183,9 @@ reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,                            int32, out); +	GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options, +			  uint32, out); +          /* Reset this so we re-discover in case the topology changed.  */          priv->did_discovery = _gf_false; @@ -324,6 +327,8 @@ init (xlator_t *this)          GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);          fix_quorum_options(this,priv,qtype); +	GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out); +          priv->wait_count = 1;          priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, @@ -617,7 +622,7 @@ struct volume_options options[] = {          },          { .key = {"eager-lock"},            .type = GF_OPTION_TYPE_BOOL, -          .default_value = "off", +          .default_value = "on",          },          { .key = {"self-heal-daemon"},            .type = GF_OPTION_TYPE_BOOL, @@ -657,5 +662,14 @@ struct volume_options options[] = {            .default_value = "600",            .description = "Poll timeout for checking the need to self-heal"          }, +        { .key  = {"post-op-delay-secs"}, +          .type = GF_OPTION_TYPE_INT, +          .min  = 0, +          .max  = INT_MAX, +          .default_value = "1", +          .description = "Time interval induced artificially before " +	                 "post-operation phase of the transaction to " +                         "enhance overlap of adjacent write operations.", +        },          { .key  = {NULL} },  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index e9867cb826d..abb95533449 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -151,6 +151,7 @@ typedef struct _afr_private {          struct list_head saved_fds;   /* list of fds on which locks have succeeded */          gf_boolean_t      optimistic_change_log;          gf_boolean_t      eager_lock; +	uint32_t          post_op_delay_secs;          unsigned int      quorum_count;          char                   vol_uuid[UUID_SIZE + 1]; @@ -413,6 +414,7 @@ typedef struct _afr_local {          dict_t  *dict;          int      optimistic_change_log; +	gf_boolean_t      delayed_post_op;          gf_boolean_t    fop_paused;          int (*fop_call_continue) (call_frame_t *frame, xlator_t *this); @@ -733,6 +735,11 @@ typedef struct {          unsigned char *locked_on; /* which subvolumes locks have been successful */  	struct list_head  paused_calls; /* queued calls while fix_open happens  */ + +	/* used for delayed-post-op optimization */ +	pthread_mutex_t    delay_lock; +	gf_timer_t        *delay_timer; +	call_frame_t      *delay_frame;  } afr_fd_ctx_t;  | 
