diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 147 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 13 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 7 | 
6 files changed, 173 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index d3e51822fe3..98aba9fd492 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2278,6 +2278,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)                  goto out;          } +	pthread_mutex_init (&fd_ctx->delay_lock, NULL);          INIT_LIST_HEAD (&fd_ctx->paused_calls);          INIT_LIST_HEAD (&fd_ctx->entries); @@ -2532,6 +2533,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)                  if (fd_ctx->lock_acquired)                          GF_FREE (fd_ctx->lock_acquired); +		pthread_mutex_destroy (&fd_ctx->delay_lock); +                  GF_FREE (fd_ctx);          } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 555a9b9faf6..5e61be4d4df 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)                          piggyback = 0;                          local->transaction.eager_lock[i] = 1; +			afr_set_delayed_post_op (frame, this); +                          LOCK (&local->fd->lock);                          {                                  if (fd_ctx->lock_acquired[i]) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 92f3dd65de8..65dea16df42 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -11,6 +11,7 @@  #include "dict.h"  #include "byte-order.h"  #include "common-utils.h" +#include "timer.h"  #include "afr.h"  #include "afr-transaction.h" @@ -492,7 +493,7 @@ afr_changelog_post_op_call_count (afr_transaction_type type,  }  int -afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)  {          afr_private_t * priv = this->private;          afr_internal_lock_t *int_lock = NULL; @@ -862,6 +863,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)                          }                          UNLOCK (&local->fd->lock); +			afr_set_delayed_post_op (frame, this); +                          if (piggyback)                                  afr_changelog_pre_op_cbk (frame, (void *)(long)i,                                                            this, 1, 0, xattr[i], @@ -1227,16 +1230,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)  } +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this) +{ +	afr_local_t    *local = NULL; +	afr_private_t  *priv = NULL; + +	/* call this function from any of the related optimizations +	   which benefit from delaying post op are enabled, namely: + +	   - changelog piggybacking +	   - eager locking +	*/ + +	priv = this->private; +	if (!priv) +		return; + +	if (!priv->post_op_delay_secs) +		return; + +	local = frame->local; +	if (!local) +		return; + +	if (!local->fd) +		return; + +	if (local->op == GF_FOP_WRITE) +		local->delayed_post_op = _gf_true; +} + + +gf_boolean_t +is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) +{ +	afr_local_t      *local = NULL; +	gf_boolean_t      res = _gf_false; + +	local = frame->local; +	if (!local) +		goto out; + +	if (!local->delayed_post_op) +		goto out; + +	res = _gf_true; +out: +	return res; +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd); + +void +afr_delayed_changelog_wake_up_cbk (void *data) +{ +	fd_t           *fd = NULL; + +	fd = data; + +	afr_delayed_changelog_wake_up (THIS, fd); +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd) +{ +	afr_fd_ctx_t      *fd_ctx = NULL; +	call_frame_t      *prev_frame = NULL; +	struct timeval     delta = {0, }; +	afr_private_t     *priv = NULL; + +	priv = this->private; + +	fd_ctx = afr_fd_ctx_get (fd, this); +	if (!fd_ctx) +		return; + +	delta.tv_sec = priv->post_op_delay_secs; +	delta.tv_usec = 0; + +	pthread_mutex_lock (&fd_ctx->delay_lock); +	{ +		prev_frame = fd_ctx->delay_frame; +		fd_ctx->delay_frame = NULL; +		if (fd_ctx->delay_timer) +			gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer); +		fd_ctx->delay_timer = NULL; +		if (!frame) +			goto unlock; +		fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta, +							   afr_delayed_changelog_wake_up_cbk, +							   fd); +		fd_ctx->delay_frame = frame; +	} +unlock: +	pthread_mutex_unlock (&fd_ctx->delay_lock); + +	if (prev_frame) { +		afr_changelog_post_op_now (prev_frame, this); +	} +} + + +void +afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +{ +	afr_local_t  *local = NULL; + +	local = frame->local; + +	if (is_afr_delayed_changelog_post_op_needed (frame, this)) +		afr_delayed_changelog_post_op (this, frame, local->fd); +	else +		afr_changelog_post_op_now (frame, this); +} + + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) +{ +	afr_delayed_changelog_post_op (this, NULL, fd); +} + +  int  afr_transaction_resume (call_frame_t *frame, xlator_t *this)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL;          afr_private_t       *priv     = NULL; +	fd_t                *fd = NULL;          local    = frame->local;          int_lock = &local->internal_lock;          priv     = this->private; +	fd       = local->fd; + +	if (fd) +		/* The wake up needs to happen independent of +		   what type of fop arrives here. If it was +		   a write, then it has already inherited the +		   lock and changelog. If it was not a write, +		   then the presumption of the optimization (of +		   optimizing for successive write operations) +		   fails. +		*/ +		afr_delayed_changelog_wake_up (this, fd);  	afr_restore_lk_owner (frame); diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 965ee67374c..69a8c1b1659 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -25,4 +25,6 @@ afr_fd_ctx_t *  afr_fd_ctx_get (fd_t *fd, xlator_t *this);  int  afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending); +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);  #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 3f102f80885..51c51e56a72 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -165,6 +165,8 @@ reconfigure (xlator_t *this, dict_t *options)                            uint32, out);          fix_quorum_options(this,priv,qtype); +	GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options, +			  uint32, out);          ret = 0;  out:          return ret; @@ -291,6 +293,8 @@ init (xlator_t *this)          GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);          fix_quorum_options(this,priv,qtype); +	GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out); +          priv->wait_count = 1;          priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, @@ -596,5 +600,14 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_STR,            .description = "Local glusterd uuid string",          }, +        { .key  = {"post-op-delay-secs"}, +          .type = GF_OPTION_TYPE_INT, +          .min  = 0, +          .max  = INT_MAX, +          .default_value = "1", +          .description = "Time interval induced artificially before " +	                 "post-operation phase of the transaction to " +                         "enhance overlap of adjacent write operations.", +        },          { .key  = {NULL} },  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 256e93ad213..78d108db6d8 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -149,6 +149,7 @@ typedef struct _afr_private {          struct list_head saved_fds;   /* list of fds on which locks have succeeded */          gf_boolean_t      optimistic_change_log;          gf_boolean_t      eager_lock; +	uint32_t          post_op_delay_secs;          unsigned int      quorum_count;          char                   vol_uuid[UUID_SIZE + 1]; @@ -408,6 +409,7 @@ typedef struct _afr_local {          dict_t  *dict;          int      optimistic_change_log; +	gf_boolean_t      delayed_post_op;          gf_boolean_t    fop_paused;          int (*fop_call_continue) (call_frame_t *frame, xlator_t *this); @@ -733,6 +735,11 @@ typedef struct {          unsigned char *locked_on; /* which subvolumes locks have been successful */  	struct list_head  paused_calls; /* queued calls while fix_open happens  */ + +	/* used for delayed-post-op optimization */ +	pthread_mutex_t    delay_lock; +	gf_timer_t        *delay_timer; +	call_frame_t      *delay_frame;  } afr_fd_ctx_t;  | 
