From 077185afe35350aae03bac0d25fbebcd8a3e1c72 Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Tue, 17 Mar 2015 16:43:00 +0530 Subject: cluster/afr: Make read child match check in afr optional Backport of: http://review.gluster.org/#/c/9917 Having this particular check which was introduced by commit bb2df4e63fa8a5d65f18b4a5efc757e8d475fbff causes a drop in performance in readdirp. So the behavior is made configurable with this patch. Change-Id: I9012a6bb955229a0cbb48f06e4e2edc0782dfead BUG: 1202675 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/9924 Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Atin Mukherjee Tested-by: Gluster Build System Reviewed-by: Niels de Vos --- libglusterfs/src/globals.h | 4 +++- xlators/cluster/afr/src/afr-dir-read.c | 6 ++++++ xlators/cluster/afr/src/afr.c | 14 ++++++++++++++ xlators/cluster/afr/src/afr.h | 1 + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 ++++++ 5 files changed, 30 insertions(+), 1 deletion(-) diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index 3d2cdc306c1..3eceb4bb6df 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -34,10 +34,12 @@ */ #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly should not change */ -#define GD_OP_VERSION_MAX 30501 /* MAX VERSION is the maximum count in VME +#define GD_OP_VERSION_MAX 30504 /* MAX VERSION is the maximum count in VME table, should keep changing with introduction of newer versions */ +#define GD_OP_VERSION_3_5_4 30504 + #include "xlator.h" /* THIS */ diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index d94cb0ca699..2d368a49544 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -422,11 +422,14 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata) { + afr_private_t *priv = NULL; afr_local_t *local = NULL; gf_dirent_t *entry = NULL; int par_read_child = (long) cookie; int32_t read_child = -1; + priv = this->private; + if (op_ret == -1) goto out; @@ -434,6 +437,9 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_readdir_filter_trash_dir (entries, local->fd); + if (!priv->consistent_metadata) + goto out; + list_for_each_entry (entry, &entries->list, list) { if (entry->inode) { read_child = -1; diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index c020cbccac5..0739d0de93b 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -193,6 +193,8 @@ reconfigure (xlator_t *this, dict_t *options) bool, out); GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options, bool, out); + GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata, + options, bool, out); priv->did_discovery = _gf_false; ret = 0; @@ -339,6 +341,8 @@ init (xlator_t *this) GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out); GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool, out); + GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool, + out); priv->wait_count = 1; @@ -789,5 +793,15 @@ struct volume_options options[] = { "written to the disk", .default_value = "on", }, + { .key = {"consistent-metadata"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .description = "If this option is enabled, readdirp will force " + "lookups on those entries read whose read child is " + "not the same as that of the parent. This will " + "guarantee that all read operations on a file serve " + "attributes from the same subvol as long as it holds " + " a good copy of the file/dir.", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index e704dc2e929..990279d160b 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -180,6 +180,7 @@ typedef struct _afr_private { uint64_t sh_readdir_size; gf_boolean_t ensure_durability; char *sh_domain; + gf_boolean_t consistent_metadata; } afr_private_t; typedef enum { diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index b8bad7622dc..869d11357dd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -554,6 +554,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = 3, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "cluster.consistent-metadata", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_5_4, + .flags = OPT_FLAG_CLIENT_OPT + }, /* Stripe xlator options */ { .key = "cluster.stripe-block-size", -- cgit