From e9fa7aeb1a32e22ff0749d67995e689028ca5a19 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Tue, 8 Mar 2016 16:43:12 +0530 Subject: afr: misc performance improvements Backport of http://review.gluster.org/#/c/13595/ 1. In afr_getxattr_cbk, consider the errno value before blindly launching an inode refresh and a subsequent retry on other children. 2. We want to accuse small files only when we know for sure that there is no IO happening on that inode. Otherwise, the ia_sizes obtained in the post-inode-refresh replies may mismatch due to a race between inode-refresh and ongoing writes, causing spurious heal launches. Change-Id: I9858485d1061db67353ccf99c59530731649c847 BUG: 1309462 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/13644 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Krutika Dhananjay CentOS-regression: Gluster Build System --- xlators/cluster/afr/src/afr-common.c | 74 ++++++++++++++++++++------------ xlators/cluster/afr/src/afr-inode-read.c | 11 ++++- xlators/features/locks/src/posix.c | 18 ++++++++ 3 files changed, 74 insertions(+), 29 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index bfff6048799..b232a2ded20 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -67,6 +67,37 @@ afr_copy_frame (call_frame_t *base) return frame; } +/* Check if an entry or inode could be undergoing a transaction. */ +gf_boolean_t +afr_is_possibly_under_txn (afr_transaction_type type, afr_local_t *local, + xlator_t *this) +{ + int i = 0; + int tmp = 0; + afr_private_t *priv = NULL; + GF_UNUSED char *key = NULL; + + priv = this->private; + + if (type == AFR_ENTRY_TRANSACTION) + key = GLUSTERFS_PARENT_ENTRYLK; + else if (type == AFR_DATA_TRANSACTION) + /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once + * pl_inodelk_xattr_fill supports separate keys for different + * domains.*/ + key = GLUSTERFS_INODELK_COUNT; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].xdata) + continue; + if (dict_get_int32 (local->replies[i].xdata, key, &tmp) == 0) + if (tmp) + return _gf_true; + } + + return _gf_false; +} + int __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx) { @@ -633,7 +664,6 @@ afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused, return 0; } - int afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies, unsigned char *data_accused) @@ -666,7 +696,6 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies, return 0; } - int afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode) { @@ -729,7 +758,12 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode) } - if (inode->ia_type != IA_IFDIR) + if ((inode->ia_type != IA_IFDIR) && + /* We want to accuse small files only when we know for sure that + * there is no IO happening. Otherwise, the ia_sizes obtained in + * post-refresh replies may mismatch due to a race between inode- + * refresh and ongoing writes, causing spurious heal launches*/ + !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) afr_accuse_smallfiles (this, replies, data_accused); for (i = 0; i < priv->child_count; i++) { @@ -998,6 +1032,13 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) "Unable to set link-count in dict "); } + ret = dict_set_str (xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name); + if (ret) { + gf_msg_debug (this->name, -ret, + "Unable to set inodelk-dom-count in dict "); + + } + if (local->fd) { for (i = 0; i < priv->child_count; i++) { if (local->child_up[i] && @@ -1511,30 +1552,6 @@ afr_frame_return (call_frame_t *frame) return call_count; } - -gf_boolean_t -afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this) -{ - int i = 0; - int tmp = 0; - afr_private_t *priv = NULL; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].xdata) - continue; - if (dict_get_int32 (local->replies[i].xdata, - GLUSTERFS_PARENT_ENTRYLK, - &tmp) == 0) - if (tmp) - return _gf_true; - } - - return _gf_false; -} - - static char *afr_ignore_xattrs[] = { GLUSTERFS_OPEN_FD_COUNT, GLUSTERFS_PARENT_ENTRYLK, @@ -1678,7 +1695,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) replies = local->replies; parent = local->loc.parent; - locked_entry = afr_is_entry_possibly_under_txn (local, this); + locked_entry = afr_is_possibly_under_txn (AFR_ENTRY_TRANSACTION, local, + this); readable = alloca0 (priv->child_count); diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index e58780c44b0..a49d2568b82 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -477,7 +477,16 @@ afr_filter_xattrs (dict_t *dict) } } +static +gf_boolean_t +afr_getxattr_ignorable_errnos (int32_t op_errno) +{ + if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE || + op_errno == ENAMETOOLONG) + return _gf_true; + return _gf_false; +} int afr_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -487,7 +496,7 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie, local = frame->local; - if (op_ret < 0) { + if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) { local->op_ret = op_ret; local->op_errno = op_errno; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 3d79c570bb0..76c7c4ee7ce 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -2189,6 +2189,23 @@ pl_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; } +int32_t +pl_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + PL_STACK_UNWIND (fstat, xdata, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +pl_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); + STACK_WIND (frame, pl_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; +} + int pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) @@ -2776,6 +2793,7 @@ pl_fentrylk (call_frame_t *frame, xlator_t *this, struct xlator_fops fops = { .lookup = pl_lookup, .create = pl_create, + .fstat = pl_fstat, .truncate = pl_truncate, .ftruncate = pl_ftruncate, .open = pl_open, -- cgit