From 1a0cf7dc85967387fe2c458914409b888fc86235 Mon Sep 17 00:00:00 2001 From: Shehjar Tikoo Date: Thu, 28 May 2009 04:42:58 +0000 Subject: libglusterfsclient: Add dirent pre-fetching and caching The fop interface is such that we're able to extract more than 1 dirent in a readdir fop. This commit now enables libglusterfsclient to read multiple entries on a glusterfs_readdir call. Once these have been pre-fetched, they're cached till either glusterfs_closedir ,glusterfs_rewinddir or glusterfs_seekdir are called. The current implementation is beneficial for sequential directory reading and probably indifferent to applications that do a lot of seekdir and rewinddir after opening the directory. This is because both these calls result in dirent cache invalidation. Signed-off-by: Anand V. Avati --- .../src/libglusterfsclient-internals.h | 30 +++ libglusterfsclient/src/libglusterfsclient.c | 249 +++++++++++++++------ 2 files changed, 215 insertions(+), 64 deletions(-) (limited to 'libglusterfsclient') diff --git a/libglusterfsclient/src/libglusterfsclient-internals.h b/libglusterfsclient/src/libglusterfsclient-internals.h index 42ce5d4cac0..fc79a539d69 100755 --- a/libglusterfsclient/src/libglusterfsclient-internals.h +++ b/libglusterfsclient/src/libglusterfsclient-internals.h @@ -79,6 +79,34 @@ typedef struct { struct stat stbuf; } libglusterfs_client_inode_ctx_t; +/* Our dirent cache is very simplistic when it comes to directory + * reading workloads. It assumes that all directory traversal operations happen + * sequentially and that readdir callers dont go jumping around the directory + * using seekdir, rewinddir. Thats why you'll notice that seekdir, rewinddir + * API in libglusterfsclient only set the offset. The consequence is that when + * libgf_dcache_readdir finds that the offset presented to it, is not + * the same as the offset of the previous dirent returned by dcache (..stored + * in struct direntcache->prev_off..), it realises that a non-sequential + * directory read is in progress and returns 0 to signify that the cache is + * not valid. + * This could be made a bit more intelligent by using a data structure like + * a hash-table or a balanced binary tree that allows us to search for the + * existence of particular offsets in the cache without performing a list or + * array traversal. + * Dont use a simple binary search tree because + * there is no guarantee that offsets in a sequential reading of the directory + * will be just random integers. If for some reason they are sequential, a BST + * will end up becoming a list. + */ +struct direntcache { + gf_dirent_t entries; /* Head of list of cached dirents. */ + gf_dirent_t *next; /* Pointer to the next entry that + * should be sent by readdir */ + uint64_t prev_off; /* Offset where the next read will + * happen. + */ +}; + typedef struct { pthread_mutex_t lock; off_t offset; @@ -88,6 +116,8 @@ typedef struct { * handle. */ struct dirent dirp; + struct direntcache *dcache; + } libglusterfs_client_fd_ctx_t; typedef struct libglusterfs_client_async_local { diff --git a/libglusterfsclient/src/libglusterfsclient.c b/libglusterfsclient/src/libglusterfsclient.c index 89e64b2e7e1..807fafaf016 100755 --- a/libglusterfsclient/src/libglusterfsclient.c +++ b/libglusterfsclient/src/libglusterfsclient.c @@ -186,8 +186,18 @@ libgf_alloc_fd_ctx (libglusterfs_client_ctx_t *ctx, fd_t *fd) fdctx->ctx = ctx; ctxaddr = (uint64_t) (long)fdctx; + if (fd->inode) { + if (S_ISDIR (fd->inode->st_mode)) { + fdctx->dcache = CALLOC (1, sizeof (struct direntcache)); + if (fdctx->dcache) + INIT_LIST_HEAD (&fdctx->dcache->entries.list); + /* If the calloc fails, we can still continue + * working as the dcache is not required for correct + * operation. + */ + } + } fd_ctx_set (fd, libgf_inode_to_xlator (fd->inode), ctxaddr); - out: return fdctx; } @@ -209,12 +219,166 @@ out: return ctx; } +void +libgf_dcache_invalidate (fd_t *fd) +{ + libglusterfs_client_fd_ctx_t *fd_ctx = NULL; + + if (!fd) + return; + + fd_ctx = libgf_get_fd_ctx (fd); + if (!fd_ctx) { + errno = EBADF; + return; + } + + if (!fd_ctx->dcache) + return; + + if (!list_empty (&fd_ctx->dcache->entries.list)) + gf_dirent_free (&fd_ctx->dcache->entries); + + INIT_LIST_HEAD (&fd_ctx->dcache->entries.list); + + fd_ctx->dcache->next = NULL; + fd_ctx->dcache->prev_off = 0; + + return; +} + +/* The first entry in the entries is always a placeholder + * or the list head. The real entries begin from entries->next. + */ +int +libgf_dcache_update (libglusterfs_client_ctx_t *ctx, fd_t *fd, + gf_dirent_t *entries) +{ + libglusterfs_client_fd_ctx_t *fd_ctx = NULL; + int op_ret = -1; + + if ((!ctx) || (!fd) || (!entries)) { + errno = EINVAL; + goto out; + } + + fd_ctx = libgf_get_fd_ctx (fd); + if (!fd_ctx) { + errno = EBADF; + goto out; + } + + /* dcache is not enabled. */ + if (!fd_ctx->dcache) { + op_ret = 0; + goto out; + } + + /* If we're updating, we must begin with invalidating any previous + * entries. + */ + libgf_dcache_invalidate (fd); + + fd_ctx->dcache->next = entries->next; + /* We still need to store a pointer to the head + * so we start free'ing from the head when invalidation + * is required. + * + * Need to delink the entries from the list + * given to us by an underlying translators. Most translators will + * free this list after this call so we must preserve the dirents in + * order to cache them. + */ + list_splice_init (&entries->list, &fd_ctx->dcache->entries.list); + op_ret = 0; +out: + return op_ret; +} + +int +libgf_dcache_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd, + struct dirent *dirp, off_t *offset) +{ + libglusterfs_client_fd_ctx_t *fd_ctx = NULL; + int cachevalid = 0; + + if ((!ctx) || (!fd) || (!dirp) || (!offset)) + return 0; + + fd_ctx = libgf_get_fd_ctx (fd); + if (!fd_ctx) { + errno = EBADF; + goto out; + } + + if (!fd_ctx->dcache) + goto out; + + /* We've either run out of entries in the cache + * or the cache is empty. + */ + if (!fd_ctx->dcache->next) + goto out; + + /* The dirent list is created as a circular linked list + * so this check is needed to ensure, we dont start + * reading old entries again. + * If we're reached this situation, the cache is exhausted + * and we'll need to pre-fetch more entries to continue serving. + */ + if (fd_ctx->dcache->next == &fd_ctx->dcache->entries) + goto out; + + /* During sequential reading we generally expect that the offset + * requested is the same as the offset we served in the previous call + * to readdir. But, seekdir, rewinddir and libgf_dcache_invalidate + * require special handling because seekdir/rewinddir change the offset + * in the fd_ctx and libgf_dcache_invalidate changes the prev_off. + */ + if (*offset != fd_ctx->dcache->prev_off) { + /* For all cases of the if branch above, we know that the + * cache is now invalid except for the case below. It handles + * the case where the two offset values above are different + * but different because the previous readdir block was + * exhausted, resulting in a prev_off being set to 0 in + * libgf_dcache_invalidate, while the requested offset is non + * zero because that is what we returned for the last dirent + * of the previous readdir block. + */ + if ((*offset != 0) && (fd_ctx->dcache->prev_off == 0)) + cachevalid = 1; + } else + cachevalid = 1; + + if (!cachevalid) + goto out; + + dirp->d_ino = fd_ctx->dcache->next->d_ino; + strncpy (dirp->d_name, fd_ctx->dcache->next->d_name, + fd_ctx->dcache->next->d_len); + + *offset = fd_ctx->dcache->next->d_off; + dirp->d_off = *offset; + fd_ctx->dcache->prev_off = fd_ctx->dcache->next->d_off; + fd_ctx->dcache->next = fd_ctx->dcache->next->next; + +out: + return cachevalid; +} + + int32_t libgf_client_release (xlator_t *this, fd_t *fd) { libglusterfs_client_fd_ctx_t *fd_ctx = NULL; - fd_ctx = libgf_del_fd_ctx (fd); + fd_ctx = libgf_get_fd_ctx (fd); + if (S_ISDIR (fd->inode->st_mode)) { + libgf_dcache_invalidate (fd); + FREE (fd_ctx->dcache); + } + + libgf_del_fd_ctx (fd); if (fd_ctx != NULL) { pthread_mutex_destroy (&fd_ctx->lock); FREE (fd_ctx); @@ -382,7 +546,13 @@ libgf_client_releasedir (xlator_t *this, fd_t *fd) { libglusterfs_client_fd_ctx_t *fd_ctx = NULL; - fd_ctx = libgf_del_fd_ctx (fd); + fd_ctx = libgf_get_fd_ctx (fd); + if (S_ISDIR (fd->inode->st_mode)) { + libgf_dcache_invalidate (fd); + FREE (fd_ctx->dcache); + } + + libgf_del_fd_ctx (fd); if (fd_ctx != NULL) { pthread_mutex_destroy (&fd_ctx->lock); FREE (fd_ctx); @@ -391,7 +561,6 @@ libgf_client_releasedir (xlator_t *this, return 0; } - void *poll_proc (void *ptr) { glusterfs_ctx_t *ctx = ptr; @@ -3328,72 +3497,26 @@ libgf_client_readdir_cbk (call_frame_t *frame, } int -libgf_client_readdir (libglusterfs_client_ctx_t *ctx, - fd_t *fd, - struct dirent *dirp, - size_t size, - off_t *offset, - int32_t num_entries) +libgf_client_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd, + struct dirent *dirp, off_t *offset) { call_stub_t *stub = NULL; int op_ret = -1; libgf_client_local_t *local = NULL; - gf_dirent_t *entry = NULL; - int32_t count = 0; - size_t entry_size = 0; - LIBGF_CLIENT_FOP (ctx, stub, readdir, local, fd, size, *offset); + if (libgf_dcache_readdir (ctx, fd, dirp, offset)) + return 1; + + LIBGF_CLIENT_FOP (ctx, stub, readdir, local, fd, + LIBGF_READDIR_BLOCK, *offset); op_ret = stub->args.readdir_cbk.op_ret; errno = stub->args.readdir_cbk.op_errno; - /* Someday we'll support caching the multiple entries returned - * from the server, till then, the logic below only extracts - * one entry depending on the previous offset given above. - */ - if (op_ret > 0) { - list_for_each_entry (entry, - &stub->args.readdir_cbk.entries.list, - list) { - entry_size = offsetof (struct dirent, d_name) - + strlen (entry->d_name) + 1; - - /* If the offset requested matches the offset of the current - * entry, it means that we need to search - * further for entry with the required offset. - */ - if (*offset == entry->d_off) - continue; - - /* If we cannot fit more data into the given buffer, or - * if we've extracted the requested number of entries, well, - * break. */ - if ((size < entry_size) || (count == num_entries)) - break; - - size -= entry_size; - - dirp->d_ino = entry->d_ino; - /* - #ifdef GF_DARWIN_HOST_OS - dirp->d_off = entry->d_seekoff; - #endif - #ifdef GF_LINUX_HOST_OS - dirp->d_off = entry->d_off; - #endif - */ - - /* dirp->d_type = entry->d_type; */ - dirp->d_reclen = entry->d_len; - strncpy (dirp->d_name, entry->d_name, dirp->d_reclen); - dirp->d_name[dirp->d_reclen] = '\0'; - - dirp = (struct dirent *) (((char *) dirp) + entry_size); - *offset = entry->d_off; - count++; - } - } + if (op_ret > 0) + libgf_dcache_update (ctx, fd, &stub->args.readdir_cbk.entries); + op_ret = libgf_dcache_readdir (ctx, fd, dirp, offset); call_stub_destroy (stub); return op_ret; } @@ -3422,8 +3545,7 @@ glusterfs_readdir (glusterfs_dir_t dirfd) pthread_mutex_unlock (&fd_ctx->lock); memset (dirp, 0, sizeof (struct dirent)); - op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, - LIBGF_READDIR_BLOCK, &offset, 1); + op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, &offset); if (op_ret <= 0) { dirp = NULL; @@ -3463,8 +3585,7 @@ glusterfs_getdents (glusterfs_file_t fd, struct dirent *dirp, } pthread_mutex_unlock (&fd_ctx->lock); - op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, count, &offset, - -1); + op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, &offset); if (op_ret > 0) { pthread_mutex_lock (&fd_ctx->lock); -- cgit