summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishir gowda <sgowda@redhat.com>2012-08-13 11:20:17 +0530
committerVijay Bellur <vbellur@redhat.com>2012-08-16 22:25:17 -0700
commit502c95bdfb35640fb424b37b080664e9c1639a86 (patch)
tree0f3f2224e3bbd51b3cacd08cc7ef4f3712c9eeaa
parent28328417bff73b13392e95a07c3d359c881bebc8 (diff)
cluster/dht: Optimize readdirp calls in DHT
Bring in option which is supported by posix xlator to filter out directory's entries from being returned. DHT would now request non-first subvols to filter out directory entries. dht xlator-option readdir-optimize will enable this optimization Change-Id: Ibf99f1bef501f285ff44a1cecfbebee9e16063b6 BUG: 838199 Signed-off-by: shishir gowda <sgowda@redhat.com> Reviewed-on: http://review.gluster.com/3806 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--libglusterfs/src/gf-dirent.h1
-rw-r--r--libglusterfs/src/glusterfs.h2
-rw-r--r--xlators/cluster/dht/src/dht-common.c24
-rw-r--r--xlators/cluster/dht/src/dht-common.h4
-rw-r--r--xlators/cluster/dht/src/dht.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1
-rw-r--r--xlators/storage/posix/src/posix.c34
7 files changed, 71 insertions, 3 deletions
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
index 36a5a62..26cb5a6 100644
--- a/libglusterfs/src/gf-dirent.h
+++ b/libglusterfs/src/gf-dirent.h
@@ -48,6 +48,7 @@ struct _gf_dirent_t {
char d_name[0];
};
+#define DT_ISDIR(mode) (mode == DT_DIR)
gf_dirent_t *gf_dirent_for_name (const char *name);
void gf_dirent_free (gf_dirent_t *entries);
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 5ce2fe2..8d6abdb 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -81,6 +81,8 @@
#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
+#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
+
#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \
strlen (GF_XATTR_PATHINFO_KEY)) == 0)
#define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 5da0c10..ade05f3 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -2766,6 +2766,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dht_layout_t *layout = 0;
dht_conf_t *conf = NULL;
xlator_t *subvol = 0;
+ int ret = 0;
INIT_LIST_HEAD (&entries.list);
prev = cookie;
@@ -2843,6 +2844,16 @@ done:
goto unwind;
}
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != dht_first_up_subvol (this)) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict set failed");
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk,
next_subvol, next_subvol->fops->readdirp,
local->fd, local->size, next_offset,
@@ -2968,11 +2979,14 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
xlator_t *xvol = NULL;
off_t xoff = 0;
int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ conf = this->private;
+
local = dht_local_init (frame, NULL, NULL, whichop);
if (!local) {
op_errno = ENOMEM;
@@ -3000,6 +3014,16 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
gf_log (this->name, GF_LOG_WARNING,
"failed to set 'glusterfs.dht.linkto'"
" key");
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != dht_first_up_subvol (this)) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Dict set failed");
+ }
+ }
}
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 9f7723f..da83967 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -246,6 +246,10 @@ struct dht_conf {
/* defrag related */
gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
};
typedef struct dht_conf dht_conf_t;
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index e3930e2..68a8efc 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -331,7 +331,8 @@ reconfigure (xlator_t *this, dict_t *options)
percent, out);
GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
options, uint32, out);
-
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
if (ret == -1)
@@ -431,6 +432,7 @@ init (xlator_t *this)
GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
bool, err);
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
ret = dht_init_subvolumes (this, conf);
if (ret == -1) {
@@ -593,6 +595,10 @@ struct volume_options options[] = {
{ .key = {"node-uuid"},
.type = GF_OPTION_TYPE_STR,
},
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 7cd97ed..198d91a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -117,6 +117,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"cluster.lookup-unhashed", "cluster/distribute", NULL, NULL, NO_DOC, 0 },
{"cluster.min-free-disk", "cluster/distribute", NULL, NULL, NO_DOC, 0 },
{"cluster.min-free-inodes", "cluster/distribute", NULL, NULL, NO_DOC, 0 },
+ {"cluster.readdir-optimize", "cluster/distribute", NULL, NULL, NO_DOC, 0 },
{"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 },
{"cluster.read-subvolume", "cluster/replicate", NULL, NULL, NO_DOC, 0 },
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 63000a3..cedf953 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3547,7 +3547,7 @@ posix_fentrylk (call_frame_t *frame, xlator_t *this,
int
posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
{
off_t in_case = -1;
size_t filled = 0;
@@ -3557,6 +3557,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
int32_t this_size = -1;
gf_dirent_t *this_entry = NULL;
uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+ struct stat stbuf = {0,};
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
+
+ if (skip_dirs) {
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ hpath = alloca (len + 256); /* NAME_MAX */
+ posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
+ len = strlen (hpath);
+ hpath[len] = '/';
+ }
if (!off) {
rewinddir (dir);
@@ -3611,6 +3623,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
continue;
}
+ if (skip_dirs) {
+ if (DT_ISDIR (entry->d_type)) {
+ continue;
+ } else if (hpath) {
+ strcpy (&hpath[len+1],entry->d_name);
+ ret = lstat (hpath, &stbuf);
+ if (!ret && S_ISDIR (stbuf.st_mode))
+ continue;
+ }
+ }
+
this_size = max (sizeof (gf_dirent_t),
sizeof (gfs3_dirplist))
+ strlen (entry->d_name) + 1;
@@ -3731,6 +3754,7 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
gf_dirent_t entries;
+ int32_t skip_dirs = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -3756,7 +3780,13 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
goto out;
}
- count = posix_fill_readdir (fd, dir, off, size, &entries);
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ count = posix_fill_readdir (fd, dir, off, size, &entries, this,
+ skip_dirs);
/* pick ENOENT to indicate EOF */
op_errno = errno;