summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSakshi <sabansal@redhat.com>2017-01-23 12:11:49 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-06-06 13:10:15 +0000
commit4757416dd2cd2110342f2e25e8e786d6c4b4abb2 (patch)
treedc6d92dcac25c02d97d15f8d3b0c55731332622f
parent8f79df30c034b303ca86aa4233fd8e899f1da888 (diff)
cluster/dht: Make optimal usage of buffer provided with readdir(p)
dht_readdirp must unwind with list of entries only after the entire buffer requested by kernel is filled to avoid extra syscalls occuring when returning partially filled buffer. Also wind readdir call to next subvol on reaching EOD for directory on that subvol to avoid extra network call. >Change-Id: If2e1a2722f813d95457c7542bff25fef56c7a041 >BUG: 1356453 >Signed-off-by: Sakshi <sabansal@redhat.com> >Signed-off-by: Raghavendra G <rgowdapp@redhat.com> >Reviewed-on: https://review.gluster.org/12271 >Smoke: Gluster Build System <jenkins@build.gluster.org> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.org> >Reviewed-by: Amar Tumballi <amarts@redhat.com> >Reviewed-by: Susant Palai <spalai@redhat.com> (cherry picked from commit b9406e210717621bc672a63c1cbd1b0183834056) Change-Id: If2e1a2722f813d95457c7542bff25fef56c7a041 BUG: 1457339 Signed-off-by: Sakshi <sabansal@redhat.com> Signed-off-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-on: https://review.gluster.org/17429 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
-rw-r--r--xlators/cluster/dht/src/dht-common.c105
-rw-r--r--xlators/cluster/dht/src/dht-common.h4
-rw-r--r--xlators/cluster/dht/src/dht-helper.c3
-rw-r--r--xlators/cluster/dht/src/dht-lock.c4
4 files changed, 62 insertions, 54 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 8b4fd5cf37b..41a71116bf8 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4999,7 +4999,6 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
dht_local_t *local = NULL;
- gf_dirent_t entries;
gf_dirent_t *orig_entry = NULL;
gf_dirent_t *entry = NULL;
xlator_t *prev = NULL;
@@ -5016,7 +5015,6 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
inode_table_t *itable = NULL;
inode_t *inode = NULL;
- INIT_LIST_HEAD (&entries.list);
prev = cookie;
local = frame->local;
itable = local->fd ? local->fd->inode->table : NULL;
@@ -5026,9 +5024,14 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
methods = &(conf->methods);
+ local->op_errno = op_errno;
+
if (op_ret < 0)
goto done;
+ if (local->op_ret < 0)
+ local->op_ret = 0;
+
if (!local->layout)
local->layout = dht_layout_get (this, local->fd->inode);
@@ -5043,11 +5046,10 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto done;
if (conf->readdir_optimize == _gf_true)
- readdir_optimize = 1;
+ readdir_optimize = 1;
list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
-
if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
/*stat failed somewhere- ignore this entry*/
gf_msg_debug (this->name, EINVAL,
@@ -5080,8 +5082,8 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (prev == hashed_subvol)
goto list;
if ((hashed_subvol
- && dht_subvol_status (conf, hashed_subvol))
- || (prev != local->first_up_subvol))
+ && dht_subvol_status (conf, hashed_subvol))
+ || (prev != local->first_up_subvol))
continue;
goto list;
@@ -5092,10 +5094,10 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
conf->link_xattr_name)) {
continue;
}
+
list:
entry = gf_dirent_for_name (orig_entry->d_name);
if (!entry) {
-
goto unwind;
}
@@ -5167,26 +5169,17 @@ list:
}
}
}
- list_add_tail (&entry->list, &entries.list);
+
+ list_add_tail (&entry->list, &local->entries.list);
+ local->filled += gf_dirent_size (entry->d_name);
count++;
+ local->op_ret++;
}
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev != dht_last_up_subvol (this))
- op_errno = 0;
done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
+ if ((count == 0) || (local && (local->filled < local->size))) {
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
next_subvol = dht_subvol_next (this, prev);
} else {
next_subvol = prev;
@@ -5220,25 +5213,30 @@ done:
}
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if ((local->op_ret >= 0) && (prev != dht_last_up_subvol (this)))
+ local->op_errno = 0;
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
- gf_dirent_free (&entries);
+ DHT_STACK_UNWIND (readdirp, frame, local->op_ret, local->op_errno,
+ &local->entries, NULL);
return 0;
}
-
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, gf_dirent_t *orig_entries,
dict_t *xdata)
{
dht_local_t *local = NULL;
- gf_dirent_t entries;
gf_dirent_t *orig_entry = NULL;
gf_dirent_t *entry = NULL;
xlator_t *prev = NULL;
@@ -5250,7 +5248,6 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_conf_t *conf = NULL;
dht_methods_t *methods = NULL;
- INIT_LIST_HEAD (&entries.list);
prev = cookie;
local = frame->local;
@@ -5259,8 +5256,14 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
methods = &(conf->methods);
- if (op_ret < 0)
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
goto done;
+ }
+
+ if (local->op_ret < 0)
+ local->op_ret = 0;
if (!local->layout)
local->layout = dht_layout_get (this, local->fd->inode);
@@ -5287,27 +5290,16 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
- list_add_tail (&entry->list, &entries.list);
+ list_add_tail (&entry->list, &local->entries.list);
count++;
+ local->filled += gf_dirent_size (entry->d_name);
+ local->op_ret++;
}
}
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev != dht_last_up_subvol (this))
- op_errno = 0;
done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
+ if ((count == 0) || (local && (local->filled < local->size))) {
+ if ((op_ret <= 0) || (op_errno == ENOENT)) {
next_subvol = dht_subvol_next (this, prev);
} else {
next_subvol = prev;
@@ -5324,12 +5316,19 @@ done:
}
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if ((local->op_ret >= 0) && (prev != dht_last_up_subvol (this)))
+ local->op_errno = 0;
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
- gf_dirent_free (&entries);
+ DHT_STACK_UNWIND (readdir, frame, local->op_ret, local->op_errno,
+ &local->entries, NULL);
return 0;
}
@@ -5362,6 +5361,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->size = size;
local->xattr_req = (dict)? dict_ref (dict) : NULL;
local->first_up_subvol = dht_first_up_subvol (this);
+ local->op_ret = -1;
dht_deitransform (this, yoff, &xvol);
@@ -8369,7 +8369,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
goto err;
}
- lookup_local = mem_get0 (this->local_pool);
+ lookup_local = dht_local_init (lookup_frame, NULL, NULL,
+ GF_FOP_LOOKUP);
if (!lookup_local) {
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index f982bf6ac1a..e5076146c22 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -346,6 +346,10 @@ struct dht_local {
call_stub_t *stub;
int32_t parent_disk_layout[4];
+ /* To hold dentries of readdir spawning across subvols */
+ gf_dirent_t entries;
+ size_t filled;
+
/* rename rollback */
int *ret_cache ;
};
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 38965298325..08825cf0b93 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -484,6 +484,8 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
if (local->ret_cache)
GF_FREE (local->ret_cache);
+ gf_dirent_free (&local->entries);
+
mem_put (local);
}
@@ -523,6 +525,7 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
inode);
}
+ INIT_LIST_HEAD (&local->entries.list);
frame->local = local;
out:
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
index 0a198a17db4..45ebeec99e8 100644
--- a/xlators/cluster/dht/src/dht-lock.c
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -455,7 +455,7 @@ dht_unlock_entrylk_wrapper (call_frame_t *frame, dht_elock_wrap_t *entrylk)
goto done;
}
- lock_local = mem_get0 (THIS->local_pool);
+ lock_local = dht_local_init (lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
DHT_MSG_PARENT_LAYOUT_CHANGED,
@@ -848,7 +848,7 @@ dht_unlock_inodelk_wrapper (call_frame_t *frame, dht_ilock_wrap_t *inodelk)
goto done;
}
- lock_local = mem_get0 (THIS->local_pool);
+ lock_local = dht_local_init (lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM,
DHT_MSG_PARENT_LAYOUT_CHANGED,