diff options
Diffstat (limited to 'xlators/performance/readdir-ahead/src')
5 files changed, 1313 insertions, 413 deletions
diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am index 539d6ede434..3d6b6ae951f 100644 --- a/xlators/performance/readdir-ahead/src/Makefile.am +++ b/xlators/performance/readdir-ahead/src/Makefile.am @@ -1,14 +1,17 @@ xlator_LTLIBRARIES = readdir-ahead.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -readdir_ahead_la_LDFLAGS = -module -avoid-version +readdir_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) readdir_ahead_la_SOURCES = readdir-ahead.c readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h +noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h \ + readdir-ahead-messages.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h index 39e2c536975..498ffae7f64 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h @@ -8,17 +8,17 @@ cases as published by the Free Software Foundation. */ - #ifndef __RDA_MEM_TYPES_H__ #define __RDA_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_rda_mem_types_ { - gf_rda_mt_rda_local = gf_common_mt_end + 1, - gf_rda_mt_rda_fd_ctx, - gf_rda_mt_rda_priv, - gf_rda_mt_end + gf_rda_mt_rda_local = gf_common_mt_end + 1, + gf_rda_mt_rda_fd_ctx, + gf_rda_mt_rda_priv, + gf_rda_mt_inode_ctx_t, + gf_rda_mt_end }; #endif diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h new file mode 100644 index 00000000000..28ec14dd845 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h @@ -0,0 +1,30 @@ +/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _READDIR_AHEAD_MESSAGES_H_ +#define _READDIR_AHEAD_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(READDIR_AHEAD, READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED, + READDIR_AHEAD_MSG_VOL_MISCONFIGURED, READDIR_AHEAD_MSG_NO_MEMORY, + READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB, + READDIR_AHEAD_MSG_OUT_OF_SEQUENCE, READDIR_AHEAD_MSG_DICT_OP_FAILED); + +#endif /* _READDIR_AHEAD_MESSAGES_H_ */ diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c index 1a567a03ea8..4ba7ee7077a 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.c +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -23,68 +23,242 @@ * preloads on the directory. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "xlator.h" -#include "call-stub.h" +#include <math.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> #include "readdir-ahead.h" #include "readdir-ahead-mem-types.h" -#include "defaults.h" +#include <glusterfs/defaults.h> +#include "readdir-ahead-messages.h" +static int +rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); -static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); +static void +rda_local_wipe(struct rda_local *local) +{ + if (local->fd) + fd_unref(local->fd); + if (local->xattrs) + dict_unref(local->xattrs); + if (local->inode) + inode_unref(local->inode); +} /* * Get (or create) the fd context for storing prepopulated directory * entries. */ -static struct -rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this) -{ - uint64_t val; - struct rda_fd_ctx *ctx; - - LOCK(&fd->lock); - - if (__fd_ctx_get(fd, this, &val) < 0) { - ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), - gf_rda_mt_rda_fd_ctx); - if (!ctx) - goto out; - - LOCK_INIT(&ctx->lock); - INIT_LIST_HEAD(&ctx->entries.list); - ctx->state = RDA_FD_NEW; - /* ctx offset values initialized to 0 */ - - if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) { - GF_FREE(ctx); - ctx = NULL; - goto out; - } - } else { - ctx = (struct rda_fd_ctx *) val; - } +static struct rda_fd_ctx * +get_rda_fd_ctx(fd_t *fd, xlator_t *this) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + LOCK(&fd->lock); + + if (__fd_ctx_get(fd, this, &val) < 0) { + ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), gf_rda_mt_rda_fd_ctx); + if (!ctx) + goto out; + + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->entries.list); + ctx->state = RDA_FD_NEW; + /* ctx offset values initialized to 0 */ + ctx->xattrs = NULL; + + if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx) < 0) { + GF_FREE(ctx); + ctx = NULL; + goto out; + } + } else { + ctx = (struct rda_fd_ctx *)(uintptr_t)val; + } +out: + UNLOCK(&fd->lock); + return ctx; +} + +static rda_inode_ctx_t * +__rda_inode_ctx_get(inode_t *inode, xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + rda_inode_ctx_t *ctx_p = NULL; + + ret = __inode_ctx_get1(inode, this, &ctx_uint); + if (ret == 0) + return (rda_inode_ctx_t *)(uintptr_t)ctx_uint; + + ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_rda_mt_inode_ctx_t); + if (!ctx_p) + return NULL; + + GF_ATOMIC_INIT(ctx_p->generation, 0); + + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set1(inode, this, &ctx_uint); + if (ret < 0) { + GF_FREE(ctx_p); + return NULL; + } + + return ctx_p; +} + +static int +__rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this, + struct iatt *stbuf_in, struct iatt *stbuf_out, + uint64_t generation) +{ + rda_inode_ctx_t *ctx_p = NULL; + struct iatt tmp_stat = { + 0, + }; + + ctx_p = __rda_inode_ctx_get(inode, this); + if (!ctx_p) + return -1; + + if ((!stbuf_in) || (stbuf_in->ia_ctime == 0)) { + /* A fop modified a file but valid stbuf is not provided. + * Can't update iatt to reflect results of fop and hence + * invalidate the iatt stored in dentry. + * + * An example of this case can be response of write request + * that is cached in write-behind. + */ + if (stbuf_in) + tmp_stat = *stbuf_in; + else + tmp_stat = ctx_p->statbuf; + memset(&ctx_p->statbuf, 0, sizeof(ctx_p->statbuf)); + gf_uuid_copy(ctx_p->statbuf.ia_gfid, tmp_stat.ia_gfid); + ctx_p->statbuf.ia_type = tmp_stat.ia_type; + GF_ATOMIC_INC(ctx_p->generation); + } else { + if (ctx_p->statbuf.ia_ctime) { + if (stbuf_in->ia_ctime < ctx_p->statbuf.ia_ctime) { + goto out; + } + + if ((stbuf_in->ia_ctime == ctx_p->statbuf.ia_ctime) && + (stbuf_in->ia_ctime_nsec < ctx_p->statbuf.ia_ctime_nsec)) { + goto out; + } + } else { + if ((generation != -1) && + (generation != GF_ATOMIC_GET(ctx_p->generation))) + goto out; + } + + ctx_p->statbuf = *stbuf_in; + } + out: - UNLOCK(&fd->lock); - return ctx; + if (stbuf_out) + *stbuf_out = ctx_p->statbuf; + + return 0; +} + +static int +rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this, + struct iatt *stbuf_in, struct iatt *stbuf_out, + uint64_t generation) +{ + int ret = -1; + + LOCK(&inode->lock); + { + ret = __rda_inode_ctx_update_iatts(inode, this, stbuf_in, stbuf_out, + generation); + } + UNLOCK(&inode->lock); + + return ret; } /* * Reset the tracking state of the context. */ static void -rda_reset_ctx(struct rda_fd_ctx *ctx) +rda_reset_ctx(xlator_t *this, struct rda_fd_ctx *ctx) { - ctx->state = RDA_FD_NEW; - ctx->cur_offset = 0; - ctx->cur_size = 0; - ctx->next_offset = 0; - ctx->op_errno = 0; - gf_dirent_free(&ctx->entries); + struct rda_priv *priv = NULL; + + priv = this->private; + + ctx->state = RDA_FD_NEW; + ctx->cur_offset = 0; + ctx->next_offset = 0; + ctx->op_errno = 0; + + gf_dirent_free(&ctx->entries); + GF_ATOMIC_SUB(priv->rda_cache_size, ctx->cur_size); + ctx->cur_size = 0; + + if (ctx->xattrs) { + dict_unref(ctx->xattrs); + ctx->xattrs = NULL; + } +} + +static void +rda_mark_inode_dirty(xlator_t *this, inode_t *inode) +{ + inode_t *parent = NULL; + fd_t *fd = NULL; + uint64_t val = 0; + int32_t ret = 0; + struct rda_fd_ctx *fd_ctx = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + parent = inode_parent(inode, NULL, NULL); + if (parent) { + LOCK(&parent->lock); + { + list_for_each_entry(fd, &parent->fd_list, inode_list) + { + val = 0; + fd_ctx_get(fd, this, &val); + if (val == 0) + continue; + + fd_ctx = (void *)(uintptr_t)val; + uuid_utoa_r(inode->gfid, gfid); + if (!GF_ATOMIC_GET(fd_ctx->prefetching)) + continue; + + LOCK(&fd_ctx->lock); + { + if (GF_ATOMIC_GET(fd_ctx->prefetching)) { + if (fd_ctx->writes_during_prefetch == NULL) + fd_ctx->writes_during_prefetch = dict_new(); + + ret = dict_set_int8(fd_ctx->writes_during_prefetch, + gfid, 1); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "marking to invalidate stats of %s from an " + "in progress " + "prefetching has failed, might result in " + "stale stat to " + "application", + gfid); + } + } + } + UNLOCK(&fd_ctx->lock); + } + } + UNLOCK(&parent->lock); + inode_unref(parent); + } + + return; } /* @@ -95,12 +269,33 @@ rda_reset_ctx(struct rda_fd_ctx *ctx) static gf_boolean_t rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) { - if ((ctx->state & RDA_FD_EOD) || - (ctx->state & RDA_FD_ERROR) || - (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0))) - return _gf_true; + if ((ctx->state & RDA_FD_EOD) || (ctx->state & RDA_FD_ERROR) || + (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) || + (request_size && ctx->cur_size >= request_size)) + return _gf_true; + + return _gf_false; +} + +void +rda_inode_ctx_get_iatt(inode_t *inode, xlator_t *this, struct iatt *attr) +{ + rda_inode_ctx_t *ctx_p = NULL; + + if (!inode || !this || !attr) + goto out; + + LOCK(&inode->lock); + { + ctx_p = __rda_inode_ctx_get(inode, this); + if (ctx_p) { + *attr = ctx_p->statbuf; + } + } + UNLOCK(&inode->lock); - return _gf_false; +out: + return; } /* @@ -108,224 +303,337 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) * buffer. ctx must be locked. */ static int32_t -__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, - struct rda_fd_ctx *ctx) +__rda_fill_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, + struct rda_fd_ctx *ctx) { - gf_dirent_t *dirent, *tmp; - size_t dirent_size, size = 0; - int32_t count = 0; - struct rda_priv *priv = this->private; + gf_dirent_t *dirent, *tmp; + size_t dirent_size, size = 0; + int32_t count = 0; + struct rda_priv *priv = NULL; + struct iatt tmp_stat = { + 0, + }; + + priv = this->private; + + list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) + { + dirent_size = gf_dirent_size(dirent->d_name); + if (size + dirent_size > request_size) + break; + + memset(&tmp_stat, 0, sizeof(tmp_stat)); + + if (dirent->inode && (!((strcmp(dirent->d_name, ".") == 0) || + (strcmp(dirent->d_name, "..") == 0)))) { + rda_inode_ctx_get_iatt(dirent->inode, this, &tmp_stat); + dirent->d_stat = tmp_stat; + } - list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) { - dirent_size = gf_dirent_size(dirent->d_name); - if (size + dirent_size > request_size) - break; + size += dirent_size; + list_del_init(&dirent->list); + ctx->cur_size -= dirent_size; - size += dirent_size; - list_del_init(&dirent->list); - ctx->cur_size -= dirent_size; + GF_ATOMIC_SUB(priv->rda_cache_size, dirent_size); - list_add_tail(&dirent->list, &entries->list); - ctx->cur_offset = dirent->d_off; - count++; - } + list_add_tail(&dirent->list, &entries->list); + ctx->cur_offset = dirent->d_off; + count++; + } - if (ctx->cur_size <= priv->rda_low_wmark) - ctx->state |= RDA_FD_PLUGGED; + if (ctx->cur_size <= priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; - return count; + return count; } static int32_t -rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) +__rda_serve_readdirp(xlator_t *this, struct rda_fd_ctx *ctx, size_t size, + gf_dirent_t *entries, int *op_errno) { - gf_dirent_t entries; - int32_t ret; - struct rda_fd_ctx *ctx; - int op_errno = 0; - - ctx = get_rda_fd_ctx(fd, this); - INIT_LIST_HEAD(&entries.list); - ret = __rda_serve_readdirp(this, &entries, size, ctx); + int32_t ret = 0; - if (!ret && (ctx->state & RDA_FD_ERROR)) { - ret = -1; - ctx->state &= ~RDA_FD_ERROR; + ret = __rda_fill_readdirp(this, entries, size, ctx); - /* - * the preload has stopped running in the event of an error, so - * pass all future requests along - */ - ctx->state |= RDA_FD_BYPASS; - } + if (!ret && (ctx->state & RDA_FD_ERROR)) { + ret = -1; + ctx->state &= ~RDA_FD_ERROR; /* - * Use the op_errno sent by lower layers as xlators above will check - * the op_errno for identifying whether readdir is completed or not. + * the preload has stopped running in the event of an error, so + * pass all future requests along */ - op_errno = ctx->op_errno; - - STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata); - gf_dirent_free(&entries); - - return 0; + ctx->state |= RDA_FD_BYPASS; + } + /* + * Use the op_errno sent by lower layers as xlators above will check + * the op_errno for identifying whether readdir is completed or not. + */ + *op_errno = ctx->op_errno; + + return ret; } static int32_t rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *xdata) -{ - struct rda_fd_ctx *ctx; - call_stub_t *stub; - int fill = 0; - - ctx = get_rda_fd_ctx(fd, this); - if (!ctx) - goto err; - - if (ctx->state & RDA_FD_BYPASS) - goto bypass; - - LOCK(&ctx->lock); - - /* recheck now that we have the lock */ - if (ctx->state & RDA_FD_BYPASS) { - UNLOCK(&ctx->lock); - goto bypass; - } - - /* - * If a new read comes in at offset 0 and the buffer has been - * completed, reset the context and kickstart the filler again. - */ - if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) { - rda_reset_ctx(ctx); - fill = 1; - } - - /* - * If a readdir occurs at an unexpected offset or we already have a - * request pending, admit defeat and just get out of the way. - */ - if (off != ctx->cur_offset || ctx->stub) { - ctx->state |= RDA_FD_BYPASS; - UNLOCK(&ctx->lock); - goto bypass; - } - - stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata); - if (!stub) { - UNLOCK(&ctx->lock); - goto err; - } - - /* - * If we haven't bypassed the preload, this means we can either serve - * the request out of the preload or the request that enables us to do - * so is in flight... - */ - if (rda_can_serve_readdirp(ctx, size)) - call_resume(stub); - else - ctx->stub = stub; - - UNLOCK(&ctx->lock); - - if (fill) - rda_fill_fd(frame, this, fd); - - return 0; + off_t off, dict_t *xdata) +{ + struct rda_fd_ctx *ctx = NULL; + int fill = 0; + gf_dirent_t entries; + int ret = 0; + int op_errno = 0; + gf_boolean_t serve = _gf_false; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + if (ctx->state & RDA_FD_BYPASS) + goto bypass; + + INIT_LIST_HEAD(&entries.list); + LOCK(&ctx->lock); + + /* recheck now that we have the lock */ + if (ctx->state & RDA_FD_BYPASS) { + UNLOCK(&ctx->lock); + goto bypass; + } + + /* + * If a new read comes in at offset 0 and the buffer has been + * completed, reset the context and kickstart the filler again. + */ + if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) { + rda_reset_ctx(this, ctx); + /* + * Unref and discard the 'list of xattrs to be fetched' + * stored during opendir call. This is done above - inside + * rda_reset_ctx(). + * Now, ref the xdata passed by md-cache in actual readdirp() + * call and use that for all subsequent internal readdirp() + * requests issued by this xlator. + */ + ctx->xattrs = dict_ref(xdata); + fill = 1; + } + + /* + * If a readdir occurs at an unexpected offset or we already have a + * request pending, admit defeat and just get out of the way. + */ + if (off != ctx->cur_offset || ctx->stub) { + ctx->state |= RDA_FD_BYPASS; + UNLOCK(&ctx->lock); + goto bypass; + } + + /* + * If we haven't bypassed the preload, this means we can either serve + * the request out of the preload or the request that enables us to do + * so is in flight... + */ + if (rda_can_serve_readdirp(ctx, size)) { + ret = __rda_serve_readdirp(this, ctx, size, &entries, &op_errno); + serve = _gf_true; + + if (op_errno == ENOENT && + !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0))) + op_errno = 0; + } else { + ctx->stub = fop_readdirp_stub(frame, NULL, fd, size, off, xdata); + if (!ctx->stub) { + UNLOCK(&ctx->lock); + goto err; + } + + if (!(ctx->state & RDA_FD_RUNNING)) { + fill = 1; + if (!ctx->xattrs) + ctx->xattrs = dict_ref(xdata); + ctx->state |= RDA_FD_RUNNING; + } + } + + UNLOCK(&ctx->lock); + + if (serve) { + STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata); + gf_dirent_free(&entries); + } + + if (fill) + rda_fill_fd(frame, this, fd); + + return 0; bypass: - STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); - return 0; + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; err: - STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); - return 0; + STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; } static int32_t rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - gf_dirent_t *dirent, *tmp; - struct rda_local *local = frame->local; - struct rda_fd_ctx *ctx = local->ctx; - struct rda_priv *priv = this->private; - int fill = 1; - - LOCK(&ctx->lock); - - /* Verify that the preload buffer is still pending on this data. */ - if (ctx->next_offset != local->offset) { - gf_log(this->name, GF_LOG_ERROR, - "Out of sequence directory preload."); - ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR); - ctx->op_errno = EUCLEAN; - - goto out; - } - - if (entries) { - list_for_each_entry_safe(dirent, tmp, &entries->list, list) { - list_del_init(&dirent->list); - /* must preserve entry order */ - list_add_tail(&dirent->list, &ctx->entries.list); - - ctx->cur_size += gf_dirent_size(dirent->d_name); - ctx->next_offset = dirent->d_off; - } - } - - if (ctx->cur_size >= priv->rda_high_wmark) - ctx->state &= ~RDA_FD_PLUGGED; - - if (!op_ret) { - /* we've hit eod */ - ctx->state &= ~RDA_FD_RUNNING; - ctx->state |= RDA_FD_EOD; - ctx->op_errno = op_errno; - } else if (op_ret == -1) { - /* kill the preload and pend the error */ - ctx->state &= ~RDA_FD_RUNNING; - ctx->state |= RDA_FD_ERROR; - ctx->op_errno = op_errno; - } - - /* - * NOTE: The strict bypass logic in readdirp() means a pending request - * is always based on ctx->cur_offset. - */ - if (ctx->stub && - rda_can_serve_readdirp(ctx, ctx->stub->args.size)) { - call_resume(ctx->stub); - ctx->stub = NULL; - } + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *dirent = NULL; + gf_dirent_t *tmp = NULL; + gf_dirent_t serve_entries; + struct rda_local *local = frame->local; + struct rda_fd_ctx *ctx = local->ctx; + struct rda_priv *priv = this->private; + int fill = 1; + size_t dirent_size = 0; + int ret = 0; + gf_boolean_t serve = _gf_false; + call_stub_t *stub = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; + uint64_t generation = 0; + call_frame_t *fill_frame = NULL; + + INIT_LIST_HEAD(&serve_entries.list); + LOCK(&ctx->lock); + + /* Verify that the preload buffer is still pending on this data. */ + if (ctx->next_offset != local->offset) { + gf_msg(this->name, GF_LOG_ERROR, 0, READDIR_AHEAD_MSG_OUT_OF_SEQUENCE, + "Out of sequence directory preload."); + ctx->state |= (RDA_FD_BYPASS | RDA_FD_ERROR); + ctx->op_errno = EUCLEAN; + + goto out; + } + + if (entries) { + list_for_each_entry_safe(dirent, tmp, &entries->list, list) + { + list_del_init(&dirent->list); + + /* must preserve entry order */ + list_add_tail(&dirent->list, &ctx->entries.list); + if (dirent->inode) { + /* If ctxp->stat is invalidated, don't update it + * with dirent->d_stat as we don't have + * generation number of the inode when readdirp + * request was initiated. So, we pass 0 for + * generation number + */ + + generation = -1; + if (ctx->writes_during_prefetch) { + memset(gfid, 0, sizeof(gfid)); + uuid_utoa_r(dirent->inode->gfid, gfid); + if (dict_get(ctx->writes_during_prefetch, gfid)) + generation = 0; + } + + if (!((strcmp(dirent->d_name, ".") == 0) || + (strcmp(dirent->d_name, "..") == 0))) { + rda_inode_ctx_update_iatts(dirent->inode, this, + &dirent->d_stat, &dirent->d_stat, + generation); + } + } + + dirent_size = gf_dirent_size(dirent->d_name); + + ctx->cur_size += dirent_size; + + GF_ATOMIC_ADD(priv->rda_cache_size, dirent_size); + + ctx->next_offset = dirent->d_off; + } + } + + if (ctx->writes_during_prefetch) { + dict_unref(ctx->writes_during_prefetch); + ctx->writes_during_prefetch = NULL; + } + + GF_ATOMIC_DEC(ctx->prefetching); + + if (ctx->cur_size >= priv->rda_high_wmark) + ctx->state &= ~RDA_FD_PLUGGED; + + if (!op_ret || op_errno == ENOENT) { + /* we've hit eod */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_EOD; + ctx->op_errno = op_errno; + } else if (op_ret == -1) { + /* kill the preload and pend the error */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_ERROR; + ctx->op_errno = op_errno; + } + + /* + * NOTE: The strict bypass logic in readdirp() means a pending request + * is always based on ctx->cur_offset. + */ + if (ctx->stub && rda_can_serve_readdirp(ctx, ctx->stub->args.size)) { + ret = __rda_serve_readdirp(this, ctx, ctx->stub->args.size, + &serve_entries, &op_errno); + serve = _gf_true; + stub = ctx->stub; + ctx->stub = NULL; + } out: - /* - * If we have been marked for bypass and have no pending stub, clear the - * run state so we stop preloading the context with entries. - */ - if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub) - ctx->state &= ~RDA_FD_RUNNING; + /* + * If we have been marked for bypass and have no pending stub, clear the + * run state so we stop preloading the context with entries. + */ + if (!ctx->stub && + ((ctx->state & RDA_FD_BYPASS) || + GF_ATOMIC_GET(priv->rda_cache_size) > priv->rda_cache_limit)) + ctx->state &= ~RDA_FD_RUNNING; + + if (!(ctx->state & RDA_FD_RUNNING)) { + fill = 0; + if (ctx->xattrs) { + /* + * fill = 0 and hence rda_fill_fd() won't be invoked. + * unref for ref taken in rda_fill_fd() + */ + dict_unref(ctx->xattrs); + ctx->xattrs = NULL; + } - if (!(ctx->state & RDA_FD_RUNNING)) { - fill = 0; - STACK_DESTROY(ctx->fill_frame->root); - ctx->fill_frame = NULL; - } + fill_frame = ctx->fill_frame; + ctx->fill_frame = NULL; + } - UNLOCK(&ctx->lock); + if (op_errno == ENOENT && + !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0))) + op_errno = 0; - if (fill) - rda_fill_fd(frame, this, local->fd); + UNLOCK(&ctx->lock); + if (fill_frame) { + rda_local_wipe(fill_frame->local); + STACK_DESTROY(fill_frame->root); + } - return 0; + if (serve) { + STACK_UNWIND_STRICT(readdirp, stub->frame, ret, op_errno, + &serve_entries, xdata); + gf_dirent_free(&serve_entries); + call_stub_destroy(stub); + } + + if (fill) + rda_fill_fd(frame, this, local->fd); + + return 0; } /* @@ -334,234 +642,741 @@ out: static int rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) { - call_frame_t *nframe = NULL; - struct rda_local *local = NULL; - struct rda_fd_ctx *ctx; - off_t offset; - struct rda_priv *priv = this->private; + call_frame_t *nframe = NULL; + struct rda_local *local = NULL; + struct rda_local *orig_local = frame->local; + struct rda_fd_ctx *ctx; + off_t offset; + struct rda_priv *priv = this->private; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + LOCK(&ctx->lock); + + if (ctx->state & RDA_FD_NEW) { + ctx->state &= ~RDA_FD_NEW; + ctx->state |= RDA_FD_RUNNING; + if (priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + } + + offset = ctx->next_offset; + + if (!ctx->fill_frame) { + nframe = copy_frame(frame); + if (!nframe) { + UNLOCK(&ctx->lock); + goto err; + } + + local = mem_get0(this->local_pool); + if (!local) { + UNLOCK(&ctx->lock); + goto err; + } + + local->ctx = ctx; + local->fd = fd_ref(fd); + nframe->local = local; - ctx = get_rda_fd_ctx(fd, this); - if (!ctx) - goto err; + ctx->fill_frame = nframe; - LOCK(&ctx->lock); + if (!ctx->xattrs && orig_local && orig_local->xattrs) { + /* when this function is invoked by rda_opendir_cbk */ + ctx->xattrs = dict_ref(orig_local->xattrs); + } + } else { + nframe = ctx->fill_frame; + local = nframe->local; + } - if (ctx->state & RDA_FD_NEW) { - ctx->state &= ~RDA_FD_NEW; - ctx->state |= RDA_FD_RUNNING; - if (priv->rda_low_wmark) - ctx->state |= RDA_FD_PLUGGED; - } + local->offset = offset; + GF_ATOMIC_INC(ctx->prefetching); - offset = ctx->next_offset; + UNLOCK(&ctx->lock); - if (!ctx->fill_frame) { - nframe = copy_frame(frame); - if (!nframe) { - UNLOCK(&ctx->lock); - goto err; - } + STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size, + offset, ctx->xattrs); - local = mem_get0(this->local_pool); - if (!local) { - UNLOCK(&ctx->lock); - goto err; - } + return 0; - local->ctx = ctx; - local->fd = fd; - nframe->local = local; +err: + if (nframe) { + rda_local_wipe(nframe->local); + FRAME_DESTROY(nframe); + } - ctx->fill_frame = nframe; - } else { - nframe = ctx->fill_frame; - local = nframe->local; - } + return -1; +} - local->offset = offset; +static int32_t +rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + if (!op_ret) + rda_fill_fd(frame, this, fd); - UNLOCK(&ctx->lock); + RDA_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata); + return 0; +} - STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size, - offset, NULL); +static int32_t +rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + int op_errno = 0; + struct rda_local *local = NULL; + + if (xdata) { + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto unwind; + } - return 0; + /* + * Retrieve list of keys set by md-cache xlator and store it + * in local to be consumed in rda_opendir_cbk + */ + local->xattrs = dict_copy_with_ref(xdata, NULL); + frame->local = local; + } -err: - if (nframe) - FRAME_DESTROY(nframe); + STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; - return -1; +unwind: + STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, fd, xdata); + return 0; } static int32_t -rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +rda_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - if (!op_ret) - rda_fill_fd(frame, this, fd); + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + + rda_mark_inode_dirty(this, local->inode); + + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, + local->generation); - STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); - return 0; +unwind: + RDA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, &postbuf_out, + xdata); + return 0; } static int32_t -rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - dict_t *xdata) +rda_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(writev, frame, this, fd->inode, xdata, fd, + vector, count, off, flags, iobref); + return 0; +} + +static int32_t +rda_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(fallocate, frame, this, fd->inode, xdata, fd, + keep_size, offset, len); + return 0; +} + +static int32_t +rda_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(zerofill, frame, this, fd->inode, xdata, fd, + offset, len); + return 0; +} + +static int32_t +rda_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(discard, frame, this, fd->inode, xdata, fd, + offset, len); + return 0; +} + +static int32_t +rda_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(ftruncate, frame, this, fd->inode, xdata, fd, + offset); + return 0; +} + +static int32_t +rda_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(truncate, frame, this, loc->inode, xdata, loc, + offset); + return 0; +} + +static int32_t +rda_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(setxattr, frame, this, loc->inode, xdata, loc, + dict, flags); + return 0; +} + +static int32_t +rda_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(fsetxattr, frame, this, fd->inode, xdata, fd, + dict, flags); + return 0; +} + +static int32_t +rda_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(setattr, frame, this, loc->inode, xdata, loc, + stbuf, valid); + return 0; +} + +static int32_t +rda_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = { + 0, + }; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out, + local->generation); + +unwind: + RDA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, &postbuf_out, + xdata); + return 0; +} + +static int32_t +rda_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(fsetattr, frame, this, fd->inode, xdata, fd, + stbuf, valid); + return 0; +} + +static int32_t +rda_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - return 0; + RDA_COMMON_MODIFICATION_FOP(removexattr, frame, this, loc->inode, xdata, + loc, name); + return 0; +} + +static int32_t +rda_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_mark_inode_dirty(this, local->inode); + rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP(fremovexattr, frame, this, fd->inode, xdata, fd, + name); + return 0; } static int32_t rda_releasedir(xlator_t *this, fd_t *fd) { - uint64_t val; - struct rda_fd_ctx *ctx; + uint64_t val; + struct rda_fd_ctx *ctx; - if (fd_ctx_del(fd, this, &val) < 0) - return -1; + if (fd_ctx_del(fd, this, &val) < 0) + return -1; - ctx = (struct rda_fd_ctx *) val; - if (!ctx) - return 0; + ctx = (struct rda_fd_ctx *)(uintptr_t)val; + if (!ctx) + return 0; - rda_reset_ctx(ctx); + rda_reset_ctx(this, ctx); - if (ctx->fill_frame) - STACK_DESTROY(ctx->fill_frame->root); + if (ctx->fill_frame) + STACK_DESTROY(ctx->fill_frame->root); - if (ctx->stub) - gf_log(this->name, GF_LOG_ERROR, - "released a directory with a pending stub"); + if (ctx->stub) + gf_msg(this->name, GF_LOG_ERROR, 0, + READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB, + "released a directory with a pending stub"); - GF_FREE(ctx); - return 0; + GF_FREE(ctx); + return 0; +} + +static int +rda_forget(xlator_t *this, inode_t *inode) +{ + uint64_t ctx_uint = 0; + rda_inode_ctx_t *ctx = NULL; + + inode_ctx_del1(inode, this, &ctx_uint); + if (!ctx_uint) + return 0; + + ctx = (rda_inode_ctx_t *)(uintptr_t)ctx_uint; + + GF_FREE(ctx); + + return 0; } int32_t mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - if (!this) - goto out; + if (!this) + goto out; - ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1); - if (ret != 0) - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); + if (ret != 0) + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READDIR_AHEAD_MSG_NO_MEMORY, + "Memory accounting init" + "failed"); out: - return ret; + return ret; } int reconfigure(xlator_t *this, dict_t *options) { - struct rda_priv *priv = this->private; - - GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, - uint32, err); - GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64, - err); - GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64, - err); - - return 0; + struct rda_priv *priv = this->private; + + GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, + size_uint64, err); + GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64, + err); + GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, + size_uint64, err); + GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options, + size_uint64, err); + GF_OPTION_RECONF("parallel-readdir", priv->parallel_readdir, options, bool, + err); + GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, err); + + return 0; err: - return -1; + return -1; } int init(xlator_t *this) { - struct rda_priv *priv = NULL; + struct rda_priv *priv = NULL; - GF_VALIDATE_OR_GOTO("readdir-ahead", this, err); + GF_VALIDATE_OR_GOTO("readdir-ahead", this, err); - if (!this->children || this->children->next) { - gf_log(this->name, GF_LOG_ERROR, - "FATAL: readdir-ahead not configured with exactly one" - " child"); - goto err; - } + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, + READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED, + "FATAL: readdir-ahead not configured with exactly one" + " child"); + goto err; + } - if (!this->parents) { - gf_log(this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, 0, + READDIR_AHEAD_MSG_VOL_MISCONFIGURED, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv); + if (!priv) + goto err; + this->private = priv; - priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv); - if (!priv) - goto err; - this->private = priv; + GF_ATOMIC_INIT(priv->rda_cache_size, 0); - this->local_pool = mem_pool_new(struct rda_local, 32); - if (!this->local_pool) - goto err; + this->local_pool = mem_pool_new(struct rda_local, 32); + if (!this->local_pool) + goto err; - GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); - GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err); - GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err); + GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64, err); + GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err); + GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err); + GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64, err); + GF_OPTION_INIT("parallel-readdir", priv->parallel_readdir, bool, err); + GF_OPTION_INIT("pass-through", this->pass_through, bool, err); - return 0; + return 0; err: - if (this->local_pool) - mem_pool_destroy(this->local_pool); - if (priv) - GF_FREE(priv); + if (this->local_pool) + mem_pool_destroy(this->local_pool); + if (priv) + GF_FREE(priv); - return -1; + return -1; } - void fini(xlator_t *this) { - GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out); + GF_VALIDATE_OR_GOTO("readdir-ahead", this, out); - GF_FREE(this->private); + GF_FREE(this->private); out: - return; + return; } struct xlator_fops fops = { - .opendir = rda_opendir, - .readdirp = rda_readdirp, + .opendir = rda_opendir, + .readdirp = rda_readdirp, + /* inode write */ + /* TODO: invalidate a dentry's stats if its pointing to a directory + * when entry operations happen in that directory + */ + .writev = rda_writev, + .truncate = rda_truncate, + .ftruncate = rda_ftruncate, + .fallocate = rda_fallocate, + .discard = rda_discard, + .zerofill = rda_zerofill, + /* metadata write */ + .setxattr = rda_setxattr, + .fsetxattr = rda_fsetxattr, + .setattr = rda_setattr, + .fsetattr = rda_fsetattr, + .removexattr = rda_removexattr, + .fremovexattr = rda_fremovexattr, }; struct xlator_cbks cbks = { - .releasedir = rda_releasedir, + .releasedir = rda_releasedir, + .forget = rda_forget, }; struct volume_options options[] = { - { .key = {"rda-request-size"}, - .type = GF_OPTION_TYPE_INT, - .min = 4096, - .max = 131072, - .default_value = "131072", - .description = "readdir-ahead request size", - }, - { .key = {"rda-low-wmark"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 0, - .max = 10 * GF_UNIT_MB, - .default_value = "4096", - .description = "the value under which we plug", - }, - { .key = {"rda-high-wmark"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 0, - .max = 100 * GF_UNIT_MB, - .default_value = "131072", - .description = "the value over which we unplug", - }, - { .key = {NULL} }, + { + .key = {"readdir-ahead"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable readdir-ahead", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { + .key = {"rda-request-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 4096, + .max = 131072, + .default_value = "131072", + .description = "size of buffer in readdirp calls initiated by " + "readdir-ahead ", + }, + { + .key = {"rda-low-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 10 * GF_UNIT_MB, + .default_value = "4096", + .description = "the value under which readdir-ahead plugs", + }, + { + .key = {"rda-high-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 100 * GF_UNIT_MB, + .default_value = "128KB", + .description = "the value over which readdir-ahead unplugs", + }, + { + .key = {"rda-cache-limit"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = INFINITY, + .default_value = "10MB", + .description = "maximum size of cache consumed by readdir-ahead " + "xlator. This value is global and total memory " + "consumption by readdir-ahead is capped by this " + "value, irrespective of the number/size of " + "directories cached", + }, + {.key = {"parallel-readdir"}, + .type = GF_OPTION_TYPE_BOOL, + .op_version = {GD_OP_VERSION_3_10_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .default_value = "off", + .description = "If this option is enabled, the readdir operation " + "is performed in parallel on all the bricks, thus " + "improving the performance of readdir. Note that " + "the performance improvement is higher in large " + "clusters"}, + {.key = {"pass-through"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_4_1_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT, + .tags = {"readdir-ahead"}, + .description = "Enable/Disable readdir ahead translator"}, + {.key = {NULL}}, }; +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "readdir-ahead", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h index e48786daeca..619c41059ff 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -12,35 +12,87 @@ #define __READDIR_AHEAD_H /* state flags */ -#define RDA_FD_NEW (1 << 0) -#define RDA_FD_RUNNING (1 << 1) -#define RDA_FD_EOD (1 << 2) -#define RDA_FD_ERROR (1 << 3) -#define RDA_FD_BYPASS (1 << 4) -#define RDA_FD_PLUGGED (1 << 5) +#define RDA_FD_NEW (1 << 0) +#define RDA_FD_RUNNING (1 << 1) +#define RDA_FD_EOD (1 << 2) +#define RDA_FD_ERROR (1 << 3) +#define RDA_FD_BYPASS (1 << 4) +#define RDA_FD_PLUGGED (1 << 5) + +#define RDA_COMMON_MODIFICATION_FOP(name, frame, this, __inode, __xdata, \ + args...) \ + do { \ + struct rda_local *__local = NULL; \ + rda_inode_ctx_t *ctx_p = NULL; \ + \ + __local = mem_get0(this->local_pool); \ + __local->inode = inode_ref(__inode); \ + LOCK(&__inode->lock); \ + { \ + ctx_p = __rda_inode_ctx_get(__inode, this); \ + } \ + UNLOCK(&__inode->lock); \ + __local->generation = GF_ATOMIC_GET(ctx_p->generation); \ + \ + frame->local = __local; \ + if (__xdata) \ + __local->xattrs = dict_ref(__xdata); \ + \ + STACK_WIND(frame, rda_##name##_cbk, FIRST_CHILD(this), \ + FIRST_CHILD(this)->fops->name, args, __xdata); \ + } while (0) + +#define RDA_STACK_UNWIND(fop, frame, params...) \ + do { \ + struct rda_local *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (__local) { \ + rda_local_wipe(__local); \ + mem_put(__local); \ + } \ + } while (0) struct rda_fd_ctx { - off_t cur_offset; /* current head of the ctx */ - size_t cur_size; /* current size of the preload */ - off_t next_offset; /* tail of the ctx */ - uint32_t state; - gf_lock_t lock; - gf_dirent_t entries; - call_frame_t *fill_frame; - call_stub_t *stub; - int op_errno; + off_t cur_offset; /* current head of the ctx */ + size_t cur_size; /* current size of the preload */ + off_t next_offset; /* tail of the ctx */ + uint32_t state; + gf_lock_t lock; + gf_dirent_t entries; + call_frame_t *fill_frame; + call_stub_t *stub; + int op_errno; + dict_t *xattrs; /* md-cache keys to be sent in readdirp() */ + dict_t *writes_during_prefetch; + gf_atomic_t prefetching; }; struct rda_local { - struct rda_fd_ctx *ctx; - fd_t *fd; - off_t offset; + struct rda_fd_ctx *ctx; + fd_t *fd; + dict_t *xattrs; /* md-cache keys to be sent in readdirp() */ + inode_t *inode; + off_t offset; + uint64_t generation; + int32_t skip_dir; }; struct rda_priv { - uint32_t rda_req_size; - uint64_t rda_low_wmark; - uint64_t rda_high_wmark; + uint64_t rda_req_size; + uint64_t rda_low_wmark; + uint64_t rda_high_wmark; + uint64_t rda_cache_limit; + gf_atomic_t rda_cache_size; + gf_boolean_t parallel_readdir; }; +typedef struct rda_inode_ctx { + struct iatt statbuf; + gf_atomic_t generation; +} rda_inode_ctx_t; + #endif /* __READDIR_AHEAD_H */ |
