diff options
author | Ravishankar N <ravishankar@redhat.com> | 2016-03-03 23:17:17 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-03-08 06:13:46 -0800 |
commit | d1d364634dce0c3dcfe9c2efc883c21af0494d0d (patch) | |
tree | 2af79ed651eb41643095b752844d4df9d169b02a /xlators/cluster | |
parent | eea4175addff08d010c2f7c6446397561b1cd919 (diff) |
afr: misc performance improvements
1. In afr_getxattr_cbk, consider the errno value before blindly
launching an inode refresh and a subsequent retry on other children.
2. We want to accuse small files only when we know for sure that there is no
IO happening on that inode. Otherwise, the ia_sizes obtained in the
post-inode-refresh replies may mismatch due to a race between
inode-refresh and ongoing writes, causing spurious heal launches.
Change-Id: Ife180f4fa5e584808c1077aacdc2423897675d33
BUG: 1309462
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/13595
Smoke: Gluster Build System <jenkins@build.gluster.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 74 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 11 |
2 files changed, 56 insertions, 29 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5e1acf2ebf4..7f6bf74abd9 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -62,6 +62,37 @@ afr_copy_frame (call_frame_t *base) return frame; } +/* Check if an entry or inode could be undergoing a transaction. */ +gf_boolean_t +afr_is_possibly_under_txn (afr_transaction_type type, afr_local_t *local, + xlator_t *this) +{ + int i = 0; + int tmp = 0; + afr_private_t *priv = NULL; + GF_UNUSED char *key = NULL; + + priv = this->private; + + if (type == AFR_ENTRY_TRANSACTION) + key = GLUSTERFS_PARENT_ENTRYLK; + else if (type == AFR_DATA_TRANSACTION) + /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once + * pl_inodelk_xattr_fill supports separate keys for different + * domains.*/ + key = GLUSTERFS_INODELK_COUNT; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].xdata) + continue; + if (dict_get_int32 (local->replies[i].xdata, key, &tmp) == 0) + if (tmp) + return _gf_true; + } + + return _gf_false; +} + int __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx) { @@ -628,7 +659,6 @@ afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused, return 0; } - int afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies, unsigned char *data_accused) @@ -661,7 +691,6 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies, return 0; } - int afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t *start_heal) @@ -725,7 +754,12 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, } - if (inode->ia_type != IA_IFDIR) + if ((inode->ia_type != IA_IFDIR) && + /* We want to accuse small files only when we know for sure that + * there is no IO happening. Otherwise, the ia_sizes obtained in + * post-refresh replies may mismatch due to a race between inode- + * refresh and ongoing writes, causing spurious heal launches*/ + !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) afr_accuse_smallfiles (this, replies, data_accused); for (i = 0; i < priv->child_count; i++) { @@ -979,6 +1013,13 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) "Unable to set link-count in dict "); } + ret = dict_set_str (xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name); + if (ret) { + gf_msg_debug (this->name, -ret, + "Unable to set inodelk-dom-count in dict "); + + } + if (local->fd) { for (i = 0; i < priv->child_count; i++) { if (local->child_up[i] && @@ -1492,30 +1533,6 @@ afr_frame_return (call_frame_t *frame) return call_count; } - -gf_boolean_t -afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this) -{ - int i = 0; - int tmp = 0; - afr_private_t *priv = NULL; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].xdata) - continue; - if (dict_get_int32 (local->replies[i].xdata, - GLUSTERFS_PARENT_ENTRYLK, - &tmp) == 0) - if (tmp) - return _gf_true; - } - - return _gf_false; -} - - static char *afr_ignore_xattrs[] = { GLUSTERFS_OPEN_FD_COUNT, GLUSTERFS_PARENT_ENTRYLK, @@ -1659,7 +1676,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) replies = local->replies; parent = local->loc.parent; - locked_entry = afr_is_entry_possibly_under_txn (local, this); + locked_entry = afr_is_possibly_under_txn (AFR_ENTRY_TRANSACTION, local, + this); readable = alloca0 (priv->child_count); diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index caa2a97d6d8..1690cb684dd 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -472,7 +472,16 @@ afr_filter_xattrs (dict_t *dict) } } +static +gf_boolean_t +afr_getxattr_ignorable_errnos (int32_t op_errno) +{ + if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE || + op_errno == ENAMETOOLONG) + return _gf_true; + return _gf_false; +} int afr_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -482,7 +491,7 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie, local = frame->local; - if (op_ret < 0) { + if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) { local->op_ret = op_ret; local->op_errno = op_errno; |