From 502c95bdfb35640fb424b37b080664e9c1639a86 Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Mon, 13 Aug 2012 11:20:17 +0530 Subject: cluster/dht: Optimize readdirp calls in DHT Bring in option which is supported by posix xlator to filter out directory's entries from being returned. DHT would now request non-first subvols to filter out directory entries. dht xlator-option readdir-optimize will enable this optimization Change-Id: Ibf99f1bef501f285ff44a1cecfbebee9e16063b6 BUG: 838199 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.com/3806 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-common.c | 24 ++++++++++++++++++++ xlators/cluster/dht/src/dht-common.h | 4 ++++ xlators/cluster/dht/src/dht.c | 8 ++++++- xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 + xlators/storage/posix/src/posix.c | 34 +++++++++++++++++++++++++++-- 5 files changed, 68 insertions(+), 3 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5da0c10bb..ade05f38d 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2766,6 +2766,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dht_layout_t *layout = 0; dht_conf_t *conf = NULL; xlator_t *subvol = 0; + int ret = 0; INIT_LIST_HEAD (&entries.list); prev = cookie; @@ -2843,6 +2844,16 @@ done: goto unwind; } + if (conf->readdir_optimize == _gf_true) { + if (next_subvol != dht_first_up_subvol (this)) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "dict set failed"); + } + } + STACK_WIND (frame, dht_readdirp_cbk, next_subvol, next_subvol->fops->readdirp, local->fd, local->size, next_offset, @@ -2968,11 +2979,14 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, xlator_t *xvol = NULL; off_t xoff = 0; int ret = 0; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + conf = this->private; + local = dht_local_init (frame, NULL, NULL, whichop); if (!local) { op_errno = ENOMEM; @@ -3000,6 +3014,16 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, gf_log (this->name, GF_LOG_WARNING, "failed to set 'glusterfs.dht.linkto'" " key"); + if (conf->readdir_optimize == _gf_true) { + if (xvol != dht_first_up_subvol (this)) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, + GF_LOG_ERROR, + "Dict set failed"); + } + } } STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 9f7723fdb..da83967e7 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -246,6 +246,10 @@ struct dht_conf { /* defrag related */ gf_defrag_info_t *defrag; + + /* Request to filter directory entries in readdir request */ + + gf_boolean_t readdir_optimize; }; typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index e3930e2af..68a8efcde 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -331,7 +331,8 @@ reconfigure (xlator_t *this, dict_t *options) percent, out); GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); - + GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options, + bool, out); if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { ret = dht_parse_decommissioned_bricks (this, conf, temp_str); if (ret == -1) @@ -431,6 +432,7 @@ init (xlator_t *this) GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down, bool, err); + GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err); ret = dht_init_subvolumes (this, conf); if (ret == -1) { @@ -593,6 +595,10 @@ struct volume_options options[] = { { .key = {"node-uuid"}, .type = GF_OPTION_TYPE_STR, }, + { .key = {"readdir-optimize"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 7cd97ed5d..198d91ad9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -117,6 +117,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.lookup-unhashed", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.min-free-disk", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.min-free-inodes", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, + {"cluster.readdir-optimize", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.read-subvolume", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 63000a351..cedf9538b 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3547,7 +3547,7 @@ posix_fentrylk (call_frame_t *frame, xlator_t *this, int posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, - gf_dirent_t *entries) + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { off_t in_case = -1; size_t filled = 0; @@ -3557,6 +3557,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, int32_t this_size = -1; gf_dirent_t *this_entry = NULL; uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; + + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + } if (!off) { rewinddir (dir); @@ -3611,6 +3623,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, continue; } + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } + } + this_size = max (sizeof (gf_dirent_t), sizeof (gfs3_dirplist)) + strlen (entry->d_name) + 1; @@ -3731,6 +3754,7 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; gf_dirent_t entries; + int32_t skip_dirs = 0; VALIDATE_OR_GOTO (frame, out); @@ -3756,7 +3780,13 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, goto out; } - count = posix_fill_readdir (fd, dir, off, size, &entries); + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); /* pick ENOENT to indicate EOF */ op_errno = errno; -- cgit