summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2018-09-18 12:15:57 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2018-10-05 14:37:01 +0000
commit45b5b48bc1e21099ab04cf0b1d432c6cb015cc05 (patch)
tree0b31463a7a89a11c3612d919f23f147650d24995
parent320901d1dc9e1d4b2985e7277ca22816e7936f2f (diff)
cluster/afr: Make data eager-lock decision based on number of locks
For both Virt and block workloads the file is opened multiple times leading to dynamically setting eager-lock to off for the workload. Instead of depending on the number-of-open-fds, if we change the logic to depend on number of inodelks, then it will give better performance than the earlier logic. When there is an eager-lock and number of inodelks is more than 1 we know that there is a conflicting lock, so depend on that information to decide whether to keep the current transaction go through delayed-post-op or not. Locks xlator doesn't have implementation to query number of locks in fxattrop in releases older than 3.10 so to keep things backward compatible in 3.12, data transactions will use new logic where as fxattrop transactions will use old logic. I am planning to send one more patch which makes metadata domain locks also depend on inodelk-count Profile info for a dd of 500MB to a file with another fd opened on the file using exec 250>filename Without this patch: 0.14 67.41 us 16.72 us 3870.82 us 892 FINODELK 0.59 279.87 us 95.71 us 2085.89 us 898 FXATTROP 3.46 366.43 us 81.75 us 6952.79 us 4000 WRITE 95.79 148733.99 us 50568.12 us 919127.86 us 273 FSYNC With this patch: 0.00 51.01 us 38.07 us 80.16 us 4 FINODELK 0.00 235.43 us 235.43 us 235.43 us 1 TRUNCATE 0.00 125.07 us 56.80 us 193.33 us 2 GETXATTR 0.00 135.86 us 62.13 us 209.59 us 2 INODELK 0.00 197.88 us 155.39 us 253.90 us 4 FXATTROP 0.00 450.59 us 394.28 us 506.89 us 2 XATTROP 0.00 56.96 us 19.06 us 406.59 us 23 FLUSH 37.81 273648.93 us 48.43 us 6017657.05 us 44 LOOKUP 62.18 4951.86 us 93.80 us 1143154.75 us 3999 WRITE postgresql benchmark performance changed from ~1130 TPS to ~2300TPS randio fio job inside Ovirt based VM went from ~600IOPs to ~2000IOPS fixes bz#1635972 Change-Id: If7f7388d2f08cf7f17ca517a4ea222560661dc36 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c22
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c23
-rw-r--r--xlators/cluster/afr/src/afr.h8
3 files changed, 47 insertions, 6 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index f9e2b302f8d..df807342b99 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -284,6 +284,7 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
+ int32_t num_inodelks = 0;
LOCK(&frame->lock);
{
@@ -305,6 +306,15 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->open_fd_count = open_fd_count;
local->update_open_fd_count = _gf_true;
}
+
+ ret = dict_get_int32n(xdata, GLUSTERFS_INODELK_COUNT,
+ SLEN(GLUSTERFS_INODELK_COUNT), &num_inodelks);
+ if (ret < 0)
+ goto unlock;
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
+ }
}
unlock:
UNLOCK(&frame->lock);
@@ -314,6 +324,7 @@ void
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
local = frame->local;
@@ -332,6 +343,11 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
if (local->update_open_fd_count)
local->inode_ctx->open_fd_count = local->open_fd_count;
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
}
int
@@ -512,6 +528,12 @@ afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
goto out;
}
+ if (dict_set_strn(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ SLEN(GLUSTERFS_INODELK_DOM_COUNT), this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
op_errno = ENOMEM;
goto out;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index dae4c8234e9..cf153dea9cc 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1916,17 +1916,28 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
}
gf_boolean_t
-afr_are_multiple_fds_opened(afr_local_t *local, xlator_t *this)
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
{
+ afr_lock_t *lock = NULL;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
/* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
* is taken mount2 opened the same file, it won't be able to
- * perform any data operations until mount1 releases eager-lock.
+ * perform any {meta,}data operations until mount1 releases eager-lock.
* To avoid such scenario do not enable eager-lock for this transaction
- * if open-fd-count is > 1
+ * if open-fd-count is > 1 for metadata transactions and if num-inodelks > 1
+ * for data transactions
*/
- if (local->inode_ctx->open_fd_count > 1)
- return _gf_true;
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ if (local->inode_ctx->open_fd_count > 1) {
+ return _gf_true;
+ }
+ } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ if (lock->num_inodelks > 1) {
+ return _gf_true;
+ }
+ }
return _gf_false;
}
@@ -1947,7 +1958,7 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if (afr_are_multiple_fds_opened(local, this)) {
+ if (afr_are_conflicting_ops_waiting(local, this)) {
lock->release = _gf_true;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 3395507ab90..6252f91736a 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -342,6 +342,12 @@ typedef enum {
} afr_fop_lock_state_t;
typedef struct _afr_inode_lock_t {
+ /* @num_inodelks:
+ Number of inodelks queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ int32_t num_inodelks;
unsigned int event_generation;
gf_boolean_t release;
gf_boolean_t acquired;
@@ -393,6 +399,8 @@ typedef struct _afr_local {
uint32_t open_fd_count;
gf_boolean_t update_open_fd_count;
+ int32_t num_inodelks;
+ gf_boolean_t update_num_inodelks;
gf_lkowner_t saved_lk_owner;