diff options
author | Ravishankar N <ravishankar@redhat.com> | 2016-04-05 15:16:52 +0530 |
---|---|---|
committer | Kaushal M <kaushal@redhat.com> | 2016-04-11 05:04:04 -0700 |
commit | c9c2c08d34003f49bc3a509757a135665fb20518 (patch) | |
tree | c66231eac65674b43b7866bde6e40351e334232c /xlators/cluster/afr | |
parent | 0a01154c68cb5eb884096fc67288a71c391d9160 (diff) |
arbiter: write performance improvement
Backport of: http://review.gluster.org/#/c/13906
Problem: The throughput for a 'dd' workload was much less for arbiter
configuration when compared to normal replica-3 volume. There were 2
issues:
i)arbiter_writev was using the request dict as response dict while
unwinding, leading to incorect GLUSTERFS_WRITE_IS_APPEND and
GLUSTERFS_OPEN_FD_COUNT values (=4), leading to immediate post-ops
because is_afr_delayed_changelog_post_op_needed() failed due to
afr_are_multiple_fds_opened() check.
ii) The arbiter code in afr was setting local->transaction.{start and len} =0
to take full file locks. What this meant was even for simultaenous but
non-overlapping writevs, afr_transaction_eager_lock_init() was not
happening because afr_locals_overlap() always stays true. Consequently
is_afr_delayed_changelog_post_op_needed() failed due to
local->delayed_post_op not being set.
Fix:
i) Send appropriate response dict values in arbiter_writev.
ii) Modify flock params instead of local->transaction.{start and len} to
take full file locks in the transaction.
Also changed _fill_writev_xdata() in posix to fill rsp_xdata for
whatever key is requested for.
Change-Id: I1c5fc5e98aba49ade540bb441a022e65b753432a
BUG: 1324809
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reported-by: Robert Rauch <robert.rauch@gns-systems.de>
Reported-by: Russel Purinton <russell.purinton@gmail.com>
Reviewed-on: http://review.gluster.org/13925
Smoke: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 16 |
2 files changed, 12 insertions, 12 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 4206ef2f111..084a78ecf47 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -417,7 +417,6 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) { call_frame_t *transaction_frame = NULL; afr_local_t *local = NULL; - afr_private_t *priv = NULL; int ret = -1; int op_errno = ENOMEM; @@ -426,7 +425,6 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) goto out; local = frame->local; - priv = this->private; transaction_frame->local = local; frame->local = NULL; @@ -456,12 +454,6 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) local->transaction.start = local->cont.writev.offset; local->transaction.len = iov_length (local->cont.writev.vector, local->cont.writev.count); - - /*Lock entire file to avoid network split brains.*/ - if (priv->arbiter_count == 1) { - local->transaction.start = 0; - local->transaction.len = 0; - } } ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 4c85a4b0d03..59d03e3ed9e 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1356,16 +1356,24 @@ afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this) int -afr_set_transaction_flock (afr_local_t *local) +afr_set_transaction_flock (xlator_t *this, afr_local_t *local) { afr_internal_lock_t *int_lock = NULL; afr_inodelk_t *inodelk = NULL; + afr_private_t *priv = NULL; int_lock = &local->internal_lock; inodelk = afr_get_inodelk (int_lock, int_lock->domain); + priv = this->private; - inodelk->flock.l_len = local->transaction.len; - inodelk->flock.l_start = local->transaction.start; + if (priv->arbiter_count) { + /*Lock entire file to avoid network split brains.*/ + inodelk->flock.l_len = 0; + inodelk->flock.l_start = 0; + } else { + inodelk->flock.l_len = local->transaction.len; + inodelk->flock.l_start = local->transaction.start; + } inodelk->flock.l_type = F_WRLCK; return 0; @@ -1386,7 +1394,7 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this) switch (local->transaction.type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - afr_set_transaction_flock (local); + afr_set_transaction_flock (this, local); int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk; |