From 502c95bdfb35640fb424b37b080664e9c1639a86 Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Mon, 13 Aug 2012 11:20:17 +0530 Subject: cluster/dht: Optimize readdirp calls in DHT Bring in option which is supported by posix xlator to filter out directory's entries from being returned. DHT would now request non-first subvols to filter out directory entries. dht xlator-option readdir-optimize will enable this optimization Change-Id: Ibf99f1bef501f285ff44a1cecfbebee9e16063b6 BUG: 838199 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.com/3806 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- libglusterfs/src/gf-dirent.h | 1 + libglusterfs/src/glusterfs.h | 2 ++ xlators/cluster/dht/src/dht-common.c | 24 ++++++++++++++++++++ xlators/cluster/dht/src/dht-common.h | 4 ++++ xlators/cluster/dht/src/dht.c | 8 ++++++- xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 + xlators/storage/posix/src/posix.c | 34 +++++++++++++++++++++++++++-- 7 files changed, 71 insertions(+), 3 deletions(-) diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h index 36a5a629cf2..26cb5a66872 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/gf-dirent.h @@ -48,6 +48,7 @@ struct _gf_dirent_t { char d_name[0]; }; +#define DT_ISDIR(mode) (mode == DT_DIR) gf_dirent_t *gf_dirent_for_name (const char *name); void gf_dirent_free (gf_dirent_t *entries); diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 5ce2fe227f4..8d6abdb0454 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -81,6 +81,8 @@ #define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid" #define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id" +#define GF_READDIR_SKIP_DIRS "readdir-filter-directories" + #define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \ strlen (GF_XATTR_PATHINFO_KEY)) == 0) #define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5da0c10bba2..ade05f38d3c 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2766,6 +2766,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dht_layout_t *layout = 0; dht_conf_t *conf = NULL; xlator_t *subvol = 0; + int ret = 0; INIT_LIST_HEAD (&entries.list); prev = cookie; @@ -2843,6 +2844,16 @@ done: goto unwind; } + if (conf->readdir_optimize == _gf_true) { + if (next_subvol != dht_first_up_subvol (this)) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "dict set failed"); + } + } + STACK_WIND (frame, dht_readdirp_cbk, next_subvol, next_subvol->fops->readdirp, local->fd, local->size, next_offset, @@ -2968,11 +2979,14 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, xlator_t *xvol = NULL; off_t xoff = 0; int ret = 0; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + conf = this->private; + local = dht_local_init (frame, NULL, NULL, whichop); if (!local) { op_errno = ENOMEM; @@ -3000,6 +3014,16 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, gf_log (this->name, GF_LOG_WARNING, "failed to set 'glusterfs.dht.linkto'" " key"); + if (conf->readdir_optimize == _gf_true) { + if (xvol != dht_first_up_subvol (this)) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, + GF_LOG_ERROR, + "Dict set failed"); + } + } } STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 9f7723fdbde..da83967e76d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -246,6 +246,10 @@ struct dht_conf { /* defrag related */ gf_defrag_info_t *defrag; + + /* Request to filter directory entries in readdir request */ + + gf_boolean_t readdir_optimize; }; typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index e3930e2af3f..68a8efcde88 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -331,7 +331,8 @@ reconfigure (xlator_t *this, dict_t *options) percent, out); GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); - + GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options, + bool, out); if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { ret = dht_parse_decommissioned_bricks (this, conf, temp_str); if (ret == -1) @@ -431,6 +432,7 @@ init (xlator_t *this) GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down, bool, err); + GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err); ret = dht_init_subvolumes (this, conf); if (ret == -1) { @@ -593,6 +595,10 @@ struct volume_options options[] = { { .key = {"node-uuid"}, .type = GF_OPTION_TYPE_STR, }, + { .key = {"readdir-optimize"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 7cd97ed5de3..198d91ad95e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -117,6 +117,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.lookup-unhashed", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.min-free-disk", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.min-free-inodes", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, + {"cluster.readdir-optimize", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.read-subvolume", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 63000a35120..cedf9538b25 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3547,7 +3547,7 @@ posix_fentrylk (call_frame_t *frame, xlator_t *this, int posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, - gf_dirent_t *entries) + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { off_t in_case = -1; size_t filled = 0; @@ -3557,6 +3557,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, int32_t this_size = -1; gf_dirent_t *this_entry = NULL; uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; + + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + } if (!off) { rewinddir (dir); @@ -3611,6 +3623,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, continue; } + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } + } + this_size = max (sizeof (gf_dirent_t), sizeof (gfs3_dirplist)) + strlen (entry->d_name) + 1; @@ -3731,6 +3754,7 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; gf_dirent_t entries; + int32_t skip_dirs = 0; VALIDATE_OR_GOTO (frame, out); @@ -3756,7 +3780,13 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, goto out; } - count = posix_fill_readdir (fd, dir, off, size, &entries); + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); /* pick ENOENT to indicate EOF */ op_errno = errno; -- cgit