summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2012-06-26 21:06:39 -0700
committerAnand Avati <avati@redhat.com>2012-07-04 00:17:47 -0700
commit5a3d849b8563067d35c1422e43e605bd9533f3c2 (patch)
tree3af92a855743679a9f693a5ea9c35df67ff9c5dd
parentd90596a15c03434f14258d754e37b84e3ec57310 (diff)
cluster/afr: post-op-delay support
post-op-delay introduces an artificial delay between the OP and POST-OP-CHANGELOG phases of a write transaction to increase the probability of changelog-piggyback and eager-locking to work more efficiently. Also enable eager-locking by default. Change-Id: I865ca4b68512c44818719c7e388952f15d53e6c2 BUG: 836033 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.com/3621 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pranithk@gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c3
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c147
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.c16
-rw-r--r--xlators/cluster/afr/src/afr.h7
6 files changed, 175 insertions, 2 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 407e76c0e74..6787dfc27d5 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2400,6 +2400,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
INIT_LIST_HEAD (&fd_ctx->paused_calls);
INIT_LIST_HEAD (&fd_ctx->entries);
@@ -2654,6 +2655,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
if (fd_ctx->lock_acquired)
GF_FREE (fd_ctx->lock_acquired);
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
+
GF_FREE (fd_ctx);
}
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 555a9b9faf6..5e61be4d4df 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
piggyback = 0;
local->transaction.eager_lock[i] = 1;
+ afr_set_delayed_post_op (frame, this);
+
LOCK (&local->fd->lock);
{
if (fd_ctx->lock_acquired[i]) {
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 0e9956d7bed..c4378d4451f 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -11,6 +11,7 @@
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
@@ -523,7 +524,7 @@ afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
}
int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = this->private;
afr_internal_lock_t *int_lock = NULL;
@@ -877,6 +878,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
}
UNLOCK (&local->fd->lock);
+ afr_set_delayed_post_op (frame, this);
+
if (piggyback)
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
this, 1, 0, xattr[i],
@@ -1243,16 +1246,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
}
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
+
+ - changelog piggybacking
+ - eager locking
+ */
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (!local->delayed_post_op)
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timeval delta = {0, };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_usec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
+ {
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
+ }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
+
+ if (prev_frame) {
+ afr_changelog_post_op_now (prev_frame, this);
+ }
+}
+
+
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd);
+ else
+ afr_changelog_post_op_now (frame, this);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd);
+}
+
+
int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ fd = local->fd;
+
+ if (fd)
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ afr_delayed_changelog_wake_up (this, fd);
afr_restore_lk_owner (frame);
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index d19d16fb39e..102599633e3 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -31,4 +31,6 @@ afr_fd_ctx_get (fd_t *fd, xlator_t *this);
int
afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
int child, afr_xattrop_type_t op);
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index cd84c357da8..108c82778db 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -183,6 +183,9 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
int32, out);
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
/* Reset this so we re-discover in case the topology changed. */
priv->did_discovery = _gf_false;
@@ -324,6 +327,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
fix_quorum_options(this,priv,qtype);
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+
priv->wait_count = 1;
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
@@ -617,7 +622,7 @@ struct volume_options options[] = {
},
{ .key = {"eager-lock"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
+ .default_value = "on",
},
{ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
@@ -657,5 +662,14 @@ struct volume_options options[] = {
.default_value = "600",
.description = "Poll timeout for checking the need to self-heal"
},
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index e9867cb826d..abb95533449 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -151,6 +151,7 @@ typedef struct _afr_private {
struct list_head saved_fds; /* list of fds on which locks have succeeded */
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
+ uint32_t post_op_delay_secs;
unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
@@ -413,6 +414,7 @@ typedef struct _afr_local {
dict_t *dict;
int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
gf_boolean_t fop_paused;
int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
@@ -733,6 +735,11 @@ typedef struct {
unsigned char *locked_on; /* which subvolumes locks have been successful */
struct list_head paused_calls; /* queued calls while fix_open happens */
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
} afr_fd_ctx_t;