summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShehjar Tikoo <shehjart@gluster.com>2009-05-28 04:42:58 +0000
committerAnand V. Avati <avati@dev.gluster.com>2009-06-03 00:30:54 -0700
commitb6434aadbe3e862815f4237fdf4c97284680a134 (patch)
tree7b8a87771b9efcf9ea4c85fd7bdbdf90b6a6ef5c
parent1fea167f86ed4501ed01b5c678cddc7c815f1a5b (diff)
libglusterfsclient: Add dirent pre-fetching and caching
The fop interface is such that we're able to extract more than 1 dirent in a readdir fop. This commit now enables libglusterfsclient to read multiple entries on a glusterfs_readdir call. Once these have been pre-fetched, they're cached till either glusterfs_closedir ,glusterfs_rewinddir or glusterfs_seekdir are called. The current implementation is beneficial for sequential directory reading and probably indifferent to applications that do a lot of seekdir and rewinddir after opening the directory. This is because both these calls result in dirent cache invalidation. Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient-internals.h30
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.c249
2 files changed, 215 insertions, 64 deletions
diff --git a/libglusterfsclient/src/libglusterfsclient-internals.h b/libglusterfsclient/src/libglusterfsclient-internals.h
index 42ce5d4cac0..fc79a539d69 100755
--- a/libglusterfsclient/src/libglusterfsclient-internals.h
+++ b/libglusterfsclient/src/libglusterfsclient-internals.h
@@ -79,6 +79,34 @@ typedef struct {
struct stat stbuf;
} libglusterfs_client_inode_ctx_t;
+/* Our dirent cache is very simplistic when it comes to directory
+ * reading workloads. It assumes that all directory traversal operations happen
+ * sequentially and that readdir callers dont go jumping around the directory
+ * using seekdir, rewinddir. Thats why you'll notice that seekdir, rewinddir
+ * API in libglusterfsclient only set the offset. The consequence is that when
+ * libgf_dcache_readdir finds that the offset presented to it, is not
+ * the same as the offset of the previous dirent returned by dcache (..stored
+ * in struct direntcache->prev_off..), it realises that a non-sequential
+ * directory read is in progress and returns 0 to signify that the cache is
+ * not valid.
+ * This could be made a bit more intelligent by using a data structure like
+ * a hash-table or a balanced binary tree that allows us to search for the
+ * existence of particular offsets in the cache without performing a list or
+ * array traversal.
+ * Dont use a simple binary search tree because
+ * there is no guarantee that offsets in a sequential reading of the directory
+ * will be just random integers. If for some reason they are sequential, a BST
+ * will end up becoming a list.
+ */
+struct direntcache {
+ gf_dirent_t entries; /* Head of list of cached dirents. */
+ gf_dirent_t *next; /* Pointer to the next entry that
+ * should be sent by readdir */
+ uint64_t prev_off; /* Offset where the next read will
+ * happen.
+ */
+};
+
typedef struct {
pthread_mutex_t lock;
off_t offset;
@@ -88,6 +116,8 @@ typedef struct {
* handle.
*/
struct dirent dirp;
+ struct direntcache *dcache;
+
} libglusterfs_client_fd_ctx_t;
typedef struct libglusterfs_client_async_local {
diff --git a/libglusterfsclient/src/libglusterfsclient.c b/libglusterfsclient/src/libglusterfsclient.c
index 89e64b2e7e1..807fafaf016 100755
--- a/libglusterfsclient/src/libglusterfsclient.c
+++ b/libglusterfsclient/src/libglusterfsclient.c
@@ -186,8 +186,18 @@ libgf_alloc_fd_ctx (libglusterfs_client_ctx_t *ctx, fd_t *fd)
fdctx->ctx = ctx;
ctxaddr = (uint64_t) (long)fdctx;
+ if (fd->inode) {
+ if (S_ISDIR (fd->inode->st_mode)) {
+ fdctx->dcache = CALLOC (1, sizeof (struct direntcache));
+ if (fdctx->dcache)
+ INIT_LIST_HEAD (&fdctx->dcache->entries.list);
+ /* If the calloc fails, we can still continue
+ * working as the dcache is not required for correct
+ * operation.
+ */
+ }
+ }
fd_ctx_set (fd, libgf_inode_to_xlator (fd->inode), ctxaddr);
-
out:
return fdctx;
}
@@ -209,12 +219,166 @@ out:
return ctx;
}
+void
+libgf_dcache_invalidate (fd_t *fd)
+{
+ libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd)
+ return;
+
+ fd_ctx = libgf_get_fd_ctx (fd);
+ if (!fd_ctx) {
+ errno = EBADF;
+ return;
+ }
+
+ if (!fd_ctx->dcache)
+ return;
+
+ if (!list_empty (&fd_ctx->dcache->entries.list))
+ gf_dirent_free (&fd_ctx->dcache->entries);
+
+ INIT_LIST_HEAD (&fd_ctx->dcache->entries.list);
+
+ fd_ctx->dcache->next = NULL;
+ fd_ctx->dcache->prev_off = 0;
+
+ return;
+}
+
+/* The first entry in the entries is always a placeholder
+ * or the list head. The real entries begin from entries->next.
+ */
+int
+libgf_dcache_update (libglusterfs_client_ctx_t *ctx, fd_t *fd,
+ gf_dirent_t *entries)
+{
+ libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
+ int op_ret = -1;
+
+ if ((!ctx) || (!fd) || (!entries)) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ fd_ctx = libgf_get_fd_ctx (fd);
+ if (!fd_ctx) {
+ errno = EBADF;
+ goto out;
+ }
+
+ /* dcache is not enabled. */
+ if (!fd_ctx->dcache) {
+ op_ret = 0;
+ goto out;
+ }
+
+ /* If we're updating, we must begin with invalidating any previous
+ * entries.
+ */
+ libgf_dcache_invalidate (fd);
+
+ fd_ctx->dcache->next = entries->next;
+ /* We still need to store a pointer to the head
+ * so we start free'ing from the head when invalidation
+ * is required.
+ *
+ * Need to delink the entries from the list
+ * given to us by an underlying translators. Most translators will
+ * free this list after this call so we must preserve the dirents in
+ * order to cache them.
+ */
+ list_splice_init (&entries->list, &fd_ctx->dcache->entries.list);
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+int
+libgf_dcache_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
+ struct dirent *dirp, off_t *offset)
+{
+ libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
+ int cachevalid = 0;
+
+ if ((!ctx) || (!fd) || (!dirp) || (!offset))
+ return 0;
+
+ fd_ctx = libgf_get_fd_ctx (fd);
+ if (!fd_ctx) {
+ errno = EBADF;
+ goto out;
+ }
+
+ if (!fd_ctx->dcache)
+ goto out;
+
+ /* We've either run out of entries in the cache
+ * or the cache is empty.
+ */
+ if (!fd_ctx->dcache->next)
+ goto out;
+
+ /* The dirent list is created as a circular linked list
+ * so this check is needed to ensure, we dont start
+ * reading old entries again.
+ * If we're reached this situation, the cache is exhausted
+ * and we'll need to pre-fetch more entries to continue serving.
+ */
+ if (fd_ctx->dcache->next == &fd_ctx->dcache->entries)
+ goto out;
+
+ /* During sequential reading we generally expect that the offset
+ * requested is the same as the offset we served in the previous call
+ * to readdir. But, seekdir, rewinddir and libgf_dcache_invalidate
+ * require special handling because seekdir/rewinddir change the offset
+ * in the fd_ctx and libgf_dcache_invalidate changes the prev_off.
+ */
+ if (*offset != fd_ctx->dcache->prev_off) {
+ /* For all cases of the if branch above, we know that the
+ * cache is now invalid except for the case below. It handles
+ * the case where the two offset values above are different
+ * but different because the previous readdir block was
+ * exhausted, resulting in a prev_off being set to 0 in
+ * libgf_dcache_invalidate, while the requested offset is non
+ * zero because that is what we returned for the last dirent
+ * of the previous readdir block.
+ */
+ if ((*offset != 0) && (fd_ctx->dcache->prev_off == 0))
+ cachevalid = 1;
+ } else
+ cachevalid = 1;
+
+ if (!cachevalid)
+ goto out;
+
+ dirp->d_ino = fd_ctx->dcache->next->d_ino;
+ strncpy (dirp->d_name, fd_ctx->dcache->next->d_name,
+ fd_ctx->dcache->next->d_len);
+
+ *offset = fd_ctx->dcache->next->d_off;
+ dirp->d_off = *offset;
+ fd_ctx->dcache->prev_off = fd_ctx->dcache->next->d_off;
+ fd_ctx->dcache->next = fd_ctx->dcache->next->next;
+
+out:
+ return cachevalid;
+}
+
+
int32_t
libgf_client_release (xlator_t *this,
fd_t *fd)
{
libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_del_fd_ctx (fd);
+ fd_ctx = libgf_get_fd_ctx (fd);
+ if (S_ISDIR (fd->inode->st_mode)) {
+ libgf_dcache_invalidate (fd);
+ FREE (fd_ctx->dcache);
+ }
+
+ libgf_del_fd_ctx (fd);
if (fd_ctx != NULL) {
pthread_mutex_destroy (&fd_ctx->lock);
FREE (fd_ctx);
@@ -382,7 +546,13 @@ libgf_client_releasedir (xlator_t *this,
fd_t *fd)
{
libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_del_fd_ctx (fd);
+ fd_ctx = libgf_get_fd_ctx (fd);
+ if (S_ISDIR (fd->inode->st_mode)) {
+ libgf_dcache_invalidate (fd);
+ FREE (fd_ctx->dcache);
+ }
+
+ libgf_del_fd_ctx (fd);
if (fd_ctx != NULL) {
pthread_mutex_destroy (&fd_ctx->lock);
FREE (fd_ctx);
@@ -391,7 +561,6 @@ libgf_client_releasedir (xlator_t *this,
return 0;
}
-
void *poll_proc (void *ptr)
{
glusterfs_ctx_t *ctx = ptr;
@@ -3328,72 +3497,26 @@ libgf_client_readdir_cbk (call_frame_t *frame,
}
int
-libgf_client_readdir (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct dirent *dirp,
- size_t size,
- off_t *offset,
- int32_t num_entries)
+libgf_client_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
+ struct dirent *dirp, off_t *offset)
{
call_stub_t *stub = NULL;
int op_ret = -1;
libgf_client_local_t *local = NULL;
- gf_dirent_t *entry = NULL;
- int32_t count = 0;
- size_t entry_size = 0;
- LIBGF_CLIENT_FOP (ctx, stub, readdir, local, fd, size, *offset);
+ if (libgf_dcache_readdir (ctx, fd, dirp, offset))
+ return 1;
+
+ LIBGF_CLIENT_FOP (ctx, stub, readdir, local, fd,
+ LIBGF_READDIR_BLOCK, *offset);
op_ret = stub->args.readdir_cbk.op_ret;
errno = stub->args.readdir_cbk.op_errno;
- /* Someday we'll support caching the multiple entries returned
- * from the server, till then, the logic below only extracts
- * one entry depending on the previous offset given above.
- */
- if (op_ret > 0) {
- list_for_each_entry (entry,
- &stub->args.readdir_cbk.entries.list,
- list) {
- entry_size = offsetof (struct dirent, d_name)
- + strlen (entry->d_name) + 1;
-
- /* If the offset requested matches the offset of the current
- * entry, it means that we need to search
- * further for entry with the required offset.
- */
- if (*offset == entry->d_off)
- continue;
-
- /* If we cannot fit more data into the given buffer, or
- * if we've extracted the requested number of entries, well,
- * break. */
- if ((size < entry_size) || (count == num_entries))
- break;
-
- size -= entry_size;
-
- dirp->d_ino = entry->d_ino;
- /*
- #ifdef GF_DARWIN_HOST_OS
- dirp->d_off = entry->d_seekoff;
- #endif
- #ifdef GF_LINUX_HOST_OS
- dirp->d_off = entry->d_off;
- #endif
- */
-
- /* dirp->d_type = entry->d_type; */
- dirp->d_reclen = entry->d_len;
- strncpy (dirp->d_name, entry->d_name, dirp->d_reclen);
- dirp->d_name[dirp->d_reclen] = '\0';
-
- dirp = (struct dirent *) (((char *) dirp) + entry_size);
- *offset = entry->d_off;
- count++;
- }
- }
+ if (op_ret > 0)
+ libgf_dcache_update (ctx, fd, &stub->args.readdir_cbk.entries);
+ op_ret = libgf_dcache_readdir (ctx, fd, dirp, offset);
call_stub_destroy (stub);
return op_ret;
}
@@ -3422,8 +3545,7 @@ glusterfs_readdir (glusterfs_dir_t dirfd)
pthread_mutex_unlock (&fd_ctx->lock);
memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp,
- LIBGF_READDIR_BLOCK, &offset, 1);
+ op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, &offset);
if (op_ret <= 0) {
dirp = NULL;
@@ -3463,8 +3585,7 @@ glusterfs_getdents (glusterfs_file_t fd, struct dirent *dirp,
}
pthread_mutex_unlock (&fd_ctx->lock);
- op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, count, &offset,
- -1);
+ op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, &offset);
if (op_ret > 0) {
pthread_mutex_lock (&fd_ctx->lock);