summaryrefslogtreecommitdiffstats
path: root/xlators/performance
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/performance')
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am5
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h69
-rw-r--r--xlators/performance/io-cache/src/io-cache.c3071
-rw-r--r--xlators/performance/io-cache/src/io-cache.h406
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c287
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h24
-rw-r--r--xlators/performance/io-cache/src/page.c1448
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h41
-rw-r--r--xlators/performance/io-threads/src/io-threads.c3561
-rw-r--r--xlators/performance/io-threads/src/io-threads.h120
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h9
-rw-r--r--xlators/performance/md-cache/src/Makefile.am8
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h13
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4598
-rw-r--r--xlators/performance/nl-cache/Makefile.am (renamed from xlators/performance/symlink-cache/Makefile.am)2
-rw-r--r--xlators/performance/nl-cache/src/Makefile.am12
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c1201
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h27
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h29
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c840
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h175
-rw-r--r--xlators/performance/open-behind/src/Makefile.am7
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h9
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h32
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1575
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h16
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h31
-rw-r--r--xlators/performance/quick-read/src/quick-read.c2125
-rw-r--r--xlators/performance/quick-read/src/quick-read.h88
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c846
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h17
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h31
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1791
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h183
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am18
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h30
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1382
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h98
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am13
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c399
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h16
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h31
-rw-r--r--xlators/performance/write-behind/src/write-behind.c4137
51 files changed, 17544 insertions, 11364 deletions
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am
index f99e11829db..e95725acb8c 100644
--- a/xlators/performance/Makefile.am
+++ b/xlators/performance/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind
+SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache \
+ quick-read md-cache open-behind nl-cache
CLEANFILES =
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am
index 155be9988c9..bfa34ce5502 100644
--- a/xlators/performance/io-cache/src/Makefile.am
+++ b/xlators/performance/io-cache/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = io-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_cache_la_LDFLAGS = -module -avoid-version
+io_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c
io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-cache.h ioc-mem-types.h
+noinst_HEADERS = io-cache.h ioc-mem-types.h io-cache-messages.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
-I$(CONTRIBDIR)/rbtree
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/io-cache/src/io-cache-messages.h b/xlators/performance/io-cache/src/io-cache-messages.h
new file mode 100644
index 00000000000..38ad0b14d0e
--- /dev/null
+++ b/xlators/performance/io-cache/src/io-cache-messages.h
@@ -0,0 +1,69 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IO_CACHE_MESSAGES_H_
+#define _IO_CACHE_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(IO_CACHE, IO_CACHE_MSG_ENFORCEMENT_FAILED,
+ IO_CACHE_MSG_INVALID_ARGUMENT,
+ IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, IO_CACHE_MSG_NO_MEMORY,
+ IO_CACHE_MSG_VOL_MISCONFIGURED, IO_CACHE_MSG_INODE_NULL,
+ IO_CACHE_MSG_PAGE_WAIT_VALIDATE, IO_CACHE_MSG_STR_COVERSION_FAILED,
+ IO_CACHE_MSG_WASTED_COPY, IO_CACHE_MSG_SET_FD_FAILED,
+ IO_CACHE_MSG_TABLE_NULL, IO_CACHE_MSG_MEMORY_INIT_FAILED,
+ IO_CACHE_MSG_NO_CACHE_SIZE_OPT, IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE,
+ IO_CACHE_MSG_CREATE_MEM_POOL_FAILED,
+ IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, IO_CACHE_MSG_NULL_PAGE_WAIT,
+ IO_CACHE_MSG_FRAME_NULL, IO_CACHE_MSG_PAGE_FAULT,
+ IO_CACHE_MSG_SERVE_READ_REQUEST, IO_CACHE_MSG_LOCAL_NULL,
+ IO_CACHE_MSG_DEFAULTING_TO_OLD);
+
+#define IO_CACHE_MSG_NO_MEMORY_STR "out of memory"
+#define IO_CACHE_MSG_ENFORCEMENT_FAILED_STR "inode context is NULL"
+#define IO_CACHE_MSG_SET_FD_FAILED_STR "failed to set fd ctx"
+#define IO_CACHE_MSG_TABLE_NULL_STR "table is NULL"
+#define IO_CACHE_MSG_MEMORY_INIT_FAILED_STR "Memory accounting init failed"
+#define IO_CACHE_MSG_NO_CACHE_SIZE_OPT_STR "could not get cache-size option"
+#define IO_CACHE_MSG_INVALID_ARGUMENT_STR \
+ "file size is greater than the max size"
+#define IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE_STR "Not reconfiguring cache-size"
+#define IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED_STR \
+ "FATAL: io-cache not configured with exactly one child"
+#define IO_CACHE_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile"
+#define IO_CACHE_MSG_CREATE_MEM_POOL_FAILED_STR \
+ "failed to create local_t's memory pool"
+#define IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED_STR "Unable to allocate mem_pool"
+#define IO_CACHE_MSG_STR_COVERSION_FAILED_STR \
+ "asprintf failed while converting prt to str"
+#define IO_CACHE_MSG_INODE_NULL_STR "ioc_inode is NULL"
+#define IO_CACHE_MSG_PAGE_WAIT_VALIDATE_STR \
+ "cache validate called without any page waiting to be validated"
+#define IO_CACHE_MSG_NULL_PAGE_WAIT_STR "asked to wait on a NULL page"
+#define IO_CACHE_MSG_WASTED_COPY_STR "wasted copy"
+#define IO_CACHE_MSG_FRAME_NULL_STR "frame>root>rsp_refs is null"
+#define IO_CACHE_MSG_PAGE_FAULT_STR "page fault on a NULL frame"
+#define IO_CACHE_MSG_SERVE_READ_REQUEST_STR \
+ "NULL page has been provided to serve read request"
+#define IO_CACHE_MSG_LOCAL_NULL_STR "local is NULL"
+#define IO_CACHE_MSG_DEFAULTING_TO_OLD_STR \
+ "minimum size of file that can be cached is greater than maximum size. " \
+ "Hence Defaulting to old value"
+#endif /* _IO_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index f56a574564a..9375d29c17f 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -8,40 +8,38 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <math.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "io-cache.h"
#include "ioc-mem-types.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include <assert.h>
#include <sys/time.h>
-
+#include "io-cache-messages.h"
int ioc_log2_page_size;
uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path);
+ioc_get_priority(ioc_table_t *table, const char *path);
struct volume_options options[];
-
-static inline uint32_t
-ioc_hashfn (void *data, int len)
+static uint32_t
+ioc_hashfn(void *data, int len)
{
- off_t offset;
+ off_t offset;
- offset = *(off_t *) data;
+ offset = *(off_t *)data;
- return (offset >> ioc_log2_page_size);
+ return (offset >> ioc_log2_page_size);
}
-static inline ioc_inode_t *
+/* TODO: This function is not used, uncomment when we find a
+ usage for this function.
+
+static ioc_inode_t *
ioc_inode_reupdate (ioc_inode_t *ioc_inode)
{
ioc_table_t *table = NULL;
@@ -54,7 +52,8 @@ ioc_inode_reupdate (ioc_inode_t *ioc_inode)
return ioc_inode;
}
-static inline ioc_inode_t *
+
+static ioc_inode_t *
ioc_get_inode (dict_t *dict, char *name)
{
ioc_inode_t *ioc_inode = NULL;
@@ -77,22 +76,74 @@ ioc_get_inode (dict_t *dict, char *name)
return ioc_inode;
}
+*/
-int32_t
-ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
+int
+ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iovec *vector, int32_t count, int op_ret, off_t offset)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {0,};
- ioc_table_t *table = NULL;
+ size_t size = 0;
+ off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0,
+ write_offset = 0;
+ off_t page_offset = 0, page_end = 0;
+ ioc_page_t *trav = NULL;
- table = ioc_inode->table;
+ size = iov_length(vector, count);
+ size = min(size, op_ret);
- gettimeofday (&tv, NULL);
+ rounded_offset = gf_floor(offset, ioc_inode->table->page_size);
+ rounded_end = gf_roof(offset + size, ioc_inode->table->page_size);
- if (time_elapsed (&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
- need_revalidate = 1;
+ trav_offset = rounded_offset;
+ ioc_inode_lock(ioc_inode);
+ {
+ while (trav_offset < rounded_end) {
+ trav = __ioc_page_get(ioc_inode, trav_offset);
+ if (trav && trav->ready) {
+ if (trav_offset == rounded_offset)
+ page_offset = offset - rounded_offset;
+ else
+ page_offset = 0;
+
+ if ((trav_offset + ioc_inode->table->page_size) >=
+ rounded_end) {
+ page_end = trav->size - (rounded_end - (offset + size));
+ } else {
+ page_end = trav->size;
+ }
+
+ iov_range_copy(trav->vector, trav->count, page_offset, vector,
+ count, write_offset, page_end - page_offset);
+ } else if (trav) {
+ if (!trav->waitq)
+ ioc_inode->table->cache_used -= __ioc_page_destroy(trav);
+ }
+
+ if (trav_offset == rounded_offset)
+ write_offset += (ioc_inode->table->page_size -
+ (offset - rounded_offset));
+ else
+ write_offset += ioc_inode->table->page_size;
+
+ trav_offset += ioc_inode->table->page_size;
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
- return need_revalidate;
+ return 0;
+}
+
+static gf_boolean_t
+ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)
+{
+ ioc_table_t *table = NULL;
+
+ GF_ASSERT(ioc_inode);
+ table = ioc_inode->table;
+ GF_ASSERT(table);
+
+ return (gf_time() - ioc_inode->cache.last_revalidate >=
+ table->cache_timeout);
}
/*
@@ -103,193 +154,201 @@ ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
* assumes lock is held
*/
int64_t
-__ioc_inode_flush (ioc_inode_t *ioc_inode)
+__ioc_inode_flush(ioc_inode_t *ioc_inode)
{
- ioc_page_t *curr = NULL, *next = NULL;
- int64_t destroy_size = 0;
- int64_t ret = 0;
+ ioc_page_t *curr = NULL, *next = NULL;
+ int64_t destroy_size = 0;
+ int64_t ret = 0;
- list_for_each_entry_safe (curr, next, &ioc_inode->cache.page_lru,
- page_lru) {
- ret = __ioc_page_destroy (curr);
+ list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru)
+ {
+ ret = __ioc_page_destroy(curr);
- if (ret != -1)
- destroy_size += ret;
- }
+ if (ret != -1)
+ destroy_size += ret;
+ }
- return destroy_size;
+ return destroy_size;
}
void
-ioc_inode_flush (ioc_inode_t *ioc_inode)
+ioc_inode_flush(ioc_inode_t *ioc_inode)
{
- int64_t destroy_size = 0;
+ int64_t destroy_size = 0;
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_lock(ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush(ioc_inode);
+ }
+ ioc_inode_unlock(ioc_inode);
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
+ if (destroy_size) {
+ ioc_table_lock(ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
}
+ ioc_table_unlock(ioc_inode->table);
+ }
- return;
+ return;
}
int32_t
-ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop,
- xdata);
- return 0;
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
int32_t
-ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ inode_ctx_get(loc->inode, this, &ioc_inode);
- if (ioc_inode
- && ((valid & GF_SET_ATTR_ATIME)
- || (valid & GF_SET_ATTR_MTIME)))
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode &&
+ ((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME)))
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+ STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ return 0;
}
int32_t
-ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
+ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf)
{
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = NULL;
- uint8_t cache_still_valid = 0;
- uint64_t tmp_ioc_inode = 0;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
- ioc_local_t *local = NULL;
-
- if (op_ret != 0)
- goto out;
+ ioc_table_t *table = NULL;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ gf_boolean_t cache_still_valid = _gf_false;
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (!this || !inode)
+ goto out;
- if (!this || !this->private) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ table = this->private;
+
+ LOCK(&inode->lock);
+ {
+ (void)__inode_ctx_get(inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ if (!ioc_inode) {
+ weight = ioc_get_priority(table, path);
+
+ ioc_inode = ioc_inode_create(table, inode, weight);
+
+ (void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
}
+ }
+ UNLOCK(&inode->lock);
- table = this->private;
+ ioc_inode_lock(ioc_inode);
+ {
+ if (ioc_inode->cache.mtime == 0) {
+ ioc_inode->cache.mtime = iabuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec;
+ }
- path = local->file_loc.path;
+ ioc_inode->ia_size = iabuf->ia_size;
+ }
+ ioc_inode_unlock(ioc_inode);
- LOCK (&inode->lock);
- {
- __inode_ctx_get (inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf);
- if (!ioc_inode) {
- weight = ioc_get_priority (table, path);
+ if (!cache_still_valid) {
+ ioc_inode_flush(ioc_inode);
+ }
- ioc_inode = ioc_inode_update (table, inode,
- weight);
+ ioc_table_lock(ioc_inode->table);
+ {
+ list_move_tail(&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock(ioc_inode->table);
- __inode_ctx_put (inode, this,
- (uint64_t)(long)ioc_inode);
- }
- }
- UNLOCK (&inode->lock);
+out:
+ return 0;
+}
- ioc_inode_lock (ioc_inode);
- {
- if (ioc_inode->cache.mtime == 0) {
- ioc_inode->cache.mtime = stbuf->ia_mtime;
- ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
- }
+int32_t
+ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
+{
+ ioc_local_t *local = NULL;
- ioc_inode->ia_size = stbuf->ia_size;
- }
- ioc_inode_unlock (ioc_inode);
+ if (op_ret != 0)
+ goto out;
- cache_still_valid = ioc_cache_still_valid (ioc_inode,
- stbuf);
+ local = frame->local;
+ if (local == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- if (!cache_still_valid) {
- ioc_inode_flush (ioc_inode);
- }
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf);
out:
- if (frame->local != NULL) {
- local = frame->local;
- loc_wipe (&local->file_loc);
- }
+ if (frame->local != NULL) {
+ local = frame->local;
+ loc_wipe(&local->file_loc);
+ }
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf,
- xdata, postparent);
- return 0;
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
+ postparent);
+ return 0;
}
int32_t
-ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
+ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- int32_t op_errno = -1, ret = -1;
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
- ret = loc_copy (&local->file_loc, loc);
- if (ret != 0) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ ret = loc_copy(&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xdata);
+ STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ if (local != NULL) {
+ loc_wipe(&local->file_loc);
+ mem_put(local);
+ }
- return 0;
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
}
/*
@@ -301,29 +360,29 @@ unwind:
*
*/
int32_t
-ioc_forget (xlator_t *this, inode_t *inode)
+ioc_forget(xlator_t *this, inode_t *inode)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (inode, this, &ioc_inode);
+ inode_ctx_get(inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode)
+ ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode);
- return 0;
+ return 0;
}
static int32_t
ioc_invalidate(xlator_t *this, inode_t *inode)
{
- ioc_inode_t *ioc_inode = NULL;
+ uint64_t ioc_inode = 0;
- inode_ctx_get(inode, this, (uint64_t *) &ioc_inode);
+ inode_ctx_get(inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush(ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode);
- return 0;
+ return 0;
}
/*
@@ -338,104 +397,103 @@ ioc_invalidate(xlator_t *this, inode_t *inode)
*
*/
int32_t
-ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- dict_t *xdata)
+ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_inode_t *ioc_inode = NULL;
- size_t destroy_size = 0;
- struct iatt *local_stbuf = NULL;
-
- local = frame->local;
- ioc_inode = local->inode;
- local_stbuf = stbuf;
-
- if ((op_ret == -1) ||
- ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
- "cache for inode(%p) is invalid. flushing all pages",
- ioc_inode);
- /* NOTE: only pages with no waiting frames are flushed by
- * ioc_inode_flush. page_fault will be generated for all
- * the pages which have waiting frames by ioc_inode_wakeup()
- */
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- if (op_ret >= 0) {
- ioc_inode->cache.mtime = stbuf->ia_mtime;
- ioc_inode->cache.mtime_nsec
- = stbuf->ia_mtime_nsec;
- }
- }
- ioc_inode_unlock (ioc_inode);
- local_stbuf = NULL;
- }
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
+ ioc_local_t *local = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ size_t destroy_size = 0;
+ struct iatt *local_stbuf = NULL;
+
+ local = frame->local;
+ ioc_inode = local->inode;
+ local_stbuf = stbuf;
+
+ if ((op_ret == -1) ||
+ ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_msg_debug(ioc_inode->table->xl->name, 0,
+ "cache for inode(%p) is invalid. flushing all pages",
+ ioc_inode);
+ /* NOTE: only pages with no waiting frames are flushed by
+ * ioc_inode_flush. page_fault will be generated for all
+ * the pages which have waiting frames by ioc_inode_wakeup()
+ */
+ ioc_inode_lock(ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush(ioc_inode);
+ if (op_ret >= 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+ local_stbuf = NULL;
+ }
+
+ if (destroy_size) {
+ ioc_table_lock(ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
}
+ ioc_table_unlock(ioc_inode->table);
+ }
- if (op_ret < 0)
- local_stbuf = NULL;
+ if (op_ret < 0)
+ local_stbuf = NULL;
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->cache.tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.last_revalidate = gf_time();
+ }
+ ioc_inode_unlock(ioc_inode);
- ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
+ ioc_inode_wakeup(frame, ioc_inode, local_stbuf);
- /* any page-fault initiated by ioc_inode_wakeup() will have its own
- * fd_ref on fd, safe to unref validate frame's private copy
- */
- fd_unref (local->fd);
+ /* any page-fault initiated by ioc_inode_wakeup() will have its own
+ * fd_ref on fd, safe to unref validate frame's private copy
+ */
+ fd_unref(local->fd);
+ dict_unref(local->xattr_req);
- STACK_DESTROY (frame->root);
+ STACK_DESTROY(frame->root);
- return 0;
+ return 0;
}
int32_t
-ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
+ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page)
{
- ioc_waitq_t *waiter = NULL, *trav = NULL;
- uint32_t page_found = 0;
- int32_t ret = 0;
+ ioc_waitq_t *waiter = NULL, *trav = NULL;
+ uint32_t page_found = 0;
+ int32_t ret = 0;
- trav = ioc_inode->waitq;
+ trav = ioc_inode->waitq;
- while (trav) {
- if (trav->data == page) {
- page_found = 1;
- break;
- }
- trav = trav->next;
+ while (trav) {
+ if (trav->data == page) {
+ page_found = 1;
+ break;
}
+ trav = trav->next;
+ }
- if (!page_found) {
- waiter = GF_CALLOC (1, sizeof (ioc_waitq_t),
- gf_ioc_mt_ioc_waitq_t);
- if (waiter == NULL) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- ret = -ENOMEM;
- goto out;
- }
-
- waiter->data = page;
- waiter->next = ioc_inode->waitq;
- ioc_inode->waitq = waiter;
+ if (!page_found) {
+ waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t);
+ if (waiter == NULL) {
+ gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ ret = -ENOMEM;
+ goto out;
}
+ waiter->data = page;
+ waiter->next = ioc_inode->waitq;
+ ioc_inode->waitq = waiter;
+ }
+
out:
- return ret;
+ return ret;
}
/*
@@ -447,74 +505,77 @@ out:
*
*/
int32_t
-ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- ioc_page_t *page)
+ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
+ ioc_page_t *page)
{
- call_frame_t *validate_frame = NULL;
- ioc_local_t *validate_local = NULL;
- ioc_local_t *local = NULL;
- int32_t ret = 0;
-
- local = frame->local;
- validate_local = mem_get0 (THIS->local_pool);
- if (validate_local == NULL) {
- ret = -1;
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- validate_frame = copy_frame (frame);
- if (validate_frame == NULL) {
- ret = -1;
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- mem_put (validate_local);
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- validate_local->fd = fd_ref (fd);
- validate_local->inode = ioc_inode;
- validate_frame->local = validate_local;
-
- STACK_WIND (validate_frame, ioc_cache_validate_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->fstat, fd, NULL);
+ call_frame_t *validate_frame = NULL;
+ ioc_local_t *validate_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t ret = 0;
+
+ local = frame->local;
+ validate_local = mem_get0(THIS->local_pool);
+ if (validate_local == NULL) {
+ ret = -1;
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ validate_frame = copy_frame(frame);
+ if (validate_frame == NULL) {
+ ret = -1;
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ mem_put(validate_local);
+ gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ validate_local->fd = fd_ref(fd);
+ validate_local->inode = ioc_inode;
+ if (local && local->xattr_req)
+ validate_local->xattr_req = dict_ref(local->xattr_req);
+ validate_frame->local = validate_local;
+
+ STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->fstat, fd,
+ validate_local->xattr_req);
out:
- return ret;
+ return ret;
}
-static inline uint32_t
-is_match (const char *path, const char *pattern)
+static uint32_t
+is_match(const char *path, const char *pattern)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
+ ret = fnmatch(pattern, path, FNM_NOESCAPE);
- return (ret == 0);
+ return (ret == 0);
}
uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path)
+ioc_get_priority(ioc_table_t *table, const char *path)
{
- uint32_t priority = 1;
- struct ioc_priority *curr = NULL;
+ uint32_t priority = 1;
+ struct ioc_priority *curr = NULL;
- if (list_empty(&table->priority_list))
- return priority;
+ if (list_empty(&table->priority_list) || !path)
+ return priority;
- priority = 0;
- list_for_each_entry (curr, &table->priority_list, list) {
- if (is_match (path, curr->pattern))
- priority = curr->priority;
- }
+ priority = 0;
+ list_for_each_entry(curr, &table->priority_list, list)
+ {
+ if (is_match(path, curr->pattern))
+ priority = curr->priority;
+ }
- return priority;
+ return priority;
}
/*
@@ -529,75 +590,68 @@ ioc_get_priority (ioc_table_t *table, const char *path)
*
*/
int32_t
-ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
+ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+
+ if (op_ret != -1) {
+ inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- local = frame->local;
- if (!this || !this->private) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ // TODO: see why inode context is NULL and handle it.
+ if (!ioc_inode) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
}
- table = this->private;
-
- if (op_ret != -1) {
- inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
- //TODO: see why inode context is NULL and handle it.
- if (!ioc_inode) {
- gf_log (this->name, GF_LOG_ERROR, "inode context is "
- "NULL (%s)", uuid_utoa (fd->inode->gfid));
- goto out;
- }
-
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ ioc_table_lock(ioc_inode->table);
+ {
+ list_move_tail(&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock(ioc_inode->table);
- ioc_inode_lock (ioc_inode);
- {
- if ((table->min_file_size > ioc_inode->ia_size)
- || ((table->max_file_size > 0)
- && (table->max_file_size < ioc_inode->ia_size))) {
- fd_ctx_set (fd, this, 1);
- }
- }
- ioc_inode_unlock (ioc_inode);
-
- /* If O_DIRECT open, we disable caching on it */
- if ((local->flags & O_DIRECT)){
- /* O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
+ ioc_inode_lock(ioc_inode);
+ {
+ if ((table->min_file_size > ioc_inode->ia_size) ||
+ ((table->max_file_size > 0) &&
+ (table->max_file_size < ioc_inode->ia_size))) {
+ fd_ctx_set(fd, this, 1);
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
+ /* If O_DIRECT open, we disable caching on it */
+ if ((local->flags & O_DIRECT)) {
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set(fd, this, 1);
}
+ }
out:
- mem_put (local);
- frame->local = NULL;
+ mem_put(local);
+ frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
/*
@@ -614,184 +668,175 @@ out:
*
*/
int32_t
-ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
- int ret = -1;
-
- local = frame->local;
- if (!this || !this->private) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- table = this->private;
- path = local->file_loc.path;
-
- if (op_ret != -1) {
- /* assign weight */
- weight = ioc_get_priority (table, path);
-
- ioc_inode = ioc_inode_update (table, inode, weight);
-
- ioc_inode_lock (ioc_inode);
- {
- ioc_inode->cache.mtime = buf->ia_mtime;
- ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
- ioc_inode->ia_size = buf->ia_size;
-
- if ((table->min_file_size > ioc_inode->ia_size)
- || ((table->max_file_size > 0)
- && (table->max_file_size < ioc_inode->ia_size))) {
- ret = fd_ctx_set (fd, this, 1);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set fd ctx",
- local->file_loc.path);
- }
- }
- ioc_inode_unlock (ioc_inode);
-
- inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
-
- /* If O_DIRECT open, we disable caching on it */
- if (local->flags & O_DIRECT) {
- /*
- * O_DIRECT is only for one fd, not the inode
- * as a whole */
- ret = fd_ctx_set (fd, this, 1);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set fd ctx",
- local->file_loc.path);
- }
-
- /* if weight == 0, we disable caching on it */
- if (!weight) {
- /* we allow a pattern-matched cache disable this way */
- ret = fd_ctx_set (fd, this, 1);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set fd ctx",
- local->file_loc.path);
- }
-
- }
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+ path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority(table, path);
+
+ ioc_inode = ioc_inode_create(table, inode, weight);
+
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+
+ if ((table->min_file_size > ioc_inode->ia_size) ||
+ ((table->max_file_size > 0) &&
+ (table->max_file_size < ioc_inode->ia_size))) {
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
+ local->file_loc.path, NULL);
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if (local->flags & O_DIRECT) {
+ /*
+ * O_DIRECT is only for one fd, not the inode
+ * as a whole */
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
+ local->file_loc.path, NULL);
+ }
+
+ /* if weight == 0, we disable caching on it */
+ if (!weight) {
+ /* we allow a pattern-matched cache disable this way */
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
+ local->file_loc.path, NULL);
+ }
+ }
out:
- frame->local = NULL;
- mem_put (local);
+ frame->local = NULL;
+ mem_put(local);
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
-
int32_t
-ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
- local = frame->local;
- if (!this || !this->private) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- table = this->private;
- path = local->file_loc.path;
+ table = this->private;
+ path = local->file_loc.path;
- if (op_ret != -1) {
- /* assign weight */
- weight = ioc_get_priority (table, path);
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority(table, path);
- ioc_inode = ioc_inode_update (table, inode, weight);
+ ioc_inode = ioc_inode_create(table, inode, weight);
- ioc_inode_lock (ioc_inode);
- {
- ioc_inode->cache.mtime = buf->ia_mtime;
- ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
- ioc_inode->ia_size = buf->ia_size;
- }
- ioc_inode_unlock (ioc_inode);
-
- inode_ctx_put (inode, this,
- (uint64_t)(long)ioc_inode);
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
}
+ ioc_inode_unlock(ioc_inode);
+
+ inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
+ }
out:
- frame->local = NULL;
+ frame->local = NULL;
- loc_wipe (&local->file_loc);
- mem_put (local);
+ loc_wipe(&local->file_loc);
+ mem_put(local);
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- int32_t op_errno = -1, ret = -1;
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
- ret = loc_copy (&local->file_loc, loc);
- if (ret != 0) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ ret = loc_copy(&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, ioc_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
+ STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
unwind:
- if (local != NULL) {
- loc_wipe (&local->file_loc);
- mem_put (local);
- }
+ if (local != NULL) {
+ loc_wipe(&local->file_loc);
+ mem_put(local);
+ }
- STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
-
/*
* ioc_open - open fop for io cache
* @frame:
@@ -801,30 +846,28 @@ unwind:
*
*/
int32_t
-ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
+ ioc_local_t *local = NULL;
- ioc_local_t *local = NULL;
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
- local->flags = flags;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd,
- xdata);
+ STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
+ return 0;
}
/*
@@ -838,32 +881,29 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*
*/
int32_t
-ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- ioc_local_t *local = NULL;
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
- }
-
- local->flags = flags;
- local->file_loc.path = loc->path;
- frame->local = local;
-
- STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode,
- umask, fd, xdata);
+ ioc_local_t *local = NULL;
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
-}
+ }
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ frame->local = local;
+ STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
/*
* ioc_release - release fop for io cache
@@ -874,49 +914,26 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*
*/
int32_t
-ioc_release (xlator_t *this, fd_t *fd)
+ioc_release(xlator_t *this, fd_t *fd)
{
- return 0;
+ return 0;
}
-/*
- * ioc_readv_disabled_cbk
- * @frame:
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @vector:
- * @count:
- *
- */
int32_t
-ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+ioc_need_prune(ioc_table_t *table)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
- return 0;
-}
-
+ int64_t cache_difference = 0;
-int32_t
-ioc_need_prune (ioc_table_t *table)
-{
- int64_t cache_difference = 0;
-
- ioc_table_lock (table);
- {
- cache_difference = table->cache_used - table->cache_size;
- }
- ioc_table_unlock (table);
+ ioc_table_lock(table);
+ {
+ cache_difference = table->cache_used - table->cache_size;
+ }
+ ioc_table_unlock(table);
- if (cache_difference > 0)
- return 1;
- else
- return 0;
+ if (cache_difference > 0)
+ return 1;
+ else
+ return 0;
}
/*
@@ -928,158 +945,151 @@ ioc_need_prune (ioc_table_t *table)
*
*/
void
-ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- off_t offset, size_t size)
+ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
+ off_t offset, size_t size)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *trav = NULL;
- ioc_waitq_t *waitq = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- int32_t fault = 0;
- size_t trav_size = 0;
- off_t local_offset = 0;
- int32_t ret = -1;
- int8_t need_validate = 0;
- int8_t might_need_validate = 0; /*
- * if a page exists, do we need
- * to validate it?
- */
- local = frame->local;
- table = ioc_inode->table;
-
- rounded_offset = floor (offset, table->page_size);
- rounded_end = roof (offset + size, table->page_size);
- trav_offset = rounded_offset;
-
- /* once a frame does read, it should be waiting on something */
- local->wait_count++;
-
- /* Requested region can fall in three different pages,
- * 1. Ready - region is already in cache, we just have to serve it.
- * 2. In-transit - page fault has been generated on this page, we need
- * to wait till the page is ready
- * 3. Fault - page is not in cache, we have to generate a page fault
- */
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *trav = NULL;
+ ioc_waitq_t *waitq = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ int32_t fault = 0;
+ size_t trav_size = 0;
+ off_t local_offset = 0;
+ int32_t ret = -1;
+ int8_t need_validate = 0;
+ int8_t might_need_validate = 0; /*
+ * if a page exists, do we need
+ * to validate it?
+ */
+ local = frame->local;
+ table = ioc_inode->table;
+
+ rounded_offset = gf_floor(offset, table->page_size);
+ rounded_end = gf_roof(offset + size, table->page_size);
+ trav_offset = rounded_offset;
+
+ /* once a frame does read, it should be waiting on something */
+ local->wait_count++;
+
+ /* Requested region can fall in three different pages,
+ * 1. Ready - region is already in cache, we just have to serve it.
+ * 2. In-transit - page fault has been generated on this page, we need
+ * to wait till the page is ready
+ * 3. Fault - page is not in cache, we have to generate a page fault
+ */
+
+ might_need_validate = ioc_inode_need_revalidate(ioc_inode);
+
+ while (trav_offset < rounded_end) {
+ ioc_inode_lock(ioc_inode);
+ {
+ /* look for requested region in the cache */
+ trav = __ioc_page_get(ioc_inode, trav_offset);
- might_need_validate = ioc_inode_need_revalidate (ioc_inode);
+ local_offset = max(trav_offset, offset);
+ trav_size = min(((offset + size) - local_offset), table->page_size);
- while (trav_offset < rounded_end) {
- ioc_inode_lock (ioc_inode);
- {
- /* look for requested region in the cache */
- trav = __ioc_page_get (ioc_inode, trav_offset);
-
- local_offset = max (trav_offset, offset);
- trav_size = min (((offset+size) - local_offset),
- table->page_size);
-
- if (!trav) {
- /* page not in cache, we need to generate page
- * fault
- */
- trav = __ioc_page_create (ioc_inode,
- trav_offset);
- fault = 1;
- if (!trav) {
- gf_log (frame->this->name,
- GF_LOG_CRITICAL,
- "out of memory");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
- }
+ if (!trav) {
+ /* page not in cache, we need to generate page
+ * fault
+ */
+ trav = __ioc_page_create(ioc_inode, trav_offset);
+ fault = 1;
+ if (!trav) {
+ gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ ioc_inode_unlock(ioc_inode);
+ goto out;
+ }
+ }
+
+ __ioc_wait_on_page(trav, frame, local_offset, trav_size);
+
+ if (trav->ready) {
+ /* page found in cache */
+ if (!might_need_validate && !ioc_inode->waitq) {
+ /* fresh enough */
+ gf_msg_trace(frame->this->name, 0,
+ "cache hit for "
+ "trav_offset=%" PRId64
+ "/local_"
+ "offset=%" PRId64 "",
+ trav_offset, local_offset);
+ waitq = __ioc_page_wakeup(trav, trav->op_errno);
+ } else {
+ /* if waitq already exists, fstat
+ * revalidate is
+ * already on the way
+ */
+ if (!ioc_inode->waitq) {
+ need_validate = 1;
+ }
+
+ ret = ioc_wait_on_inode(ioc_inode, trav);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ need_validate = 0;
- __ioc_wait_on_page (trav, frame, local_offset,
- trav_size);
-
- if (trav->ready) {
- /* page found in cache */
- if (!might_need_validate && !ioc_inode->waitq) {
- /* fresh enough */
- gf_log (frame->this->name, GF_LOG_TRACE,
- "cache hit for trav_offset=%"
- PRId64"/local_offset=%"PRId64"",
- trav_offset, local_offset);
- waitq = __ioc_page_wakeup (trav,
- trav->op_errno);
- } else {
- /* if waitq already exists, fstat
- * revalidate is
- * already on the way
- */
- if (!ioc_inode->waitq) {
- need_validate = 1;
- }
-
- ret = ioc_wait_on_inode (ioc_inode,
- trav);
- if (ret < 0) {
- local->op_ret = -1;
- local->op_errno = -ret;
- need_validate = 0;
-
- waitq = __ioc_page_wakeup (trav,
- trav->op_errno);
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
- goto out;
- }
- }
- }
+ waitq = __ioc_page_wakeup(trav, trav->op_errno);
+ ioc_inode_unlock(ioc_inode);
+ ioc_waitq_return(waitq);
+ waitq = NULL;
+ goto out;
+ }
}
- ioc_inode_unlock (ioc_inode);
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
- ioc_waitq_return (waitq);
- waitq = NULL;
+ ioc_waitq_return(waitq);
+ waitq = NULL;
- if (fault) {
- fault = 0;
- /* new page created, increase the table->cache_used */
- ioc_page_fault (ioc_inode, frame, fd, trav_offset);
- }
+ if (fault) {
+ fault = 0;
+ /* new page created, increase the table->cache_used */
+ ioc_page_fault(ioc_inode, frame, fd, trav_offset);
+ }
- if (need_validate) {
- need_validate = 0;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "sending validate request for "
- "inode(%s) at offset=%"PRId64"",
- uuid_utoa (fd->inode->gfid), trav_offset);
- ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
- if (ret == -1) {
- ioc_inode_lock (ioc_inode);
- {
- waitq = __ioc_page_wakeup (trav,
- trav->op_errno);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
- goto out;
- }
+ if (need_validate) {
+ need_validate = 0;
+ gf_msg_trace(frame->this->name, 0,
+ "sending validate request for "
+ "inode(%s) at offset=%" PRId64 "",
+ uuid_utoa(fd->inode->gfid), trav_offset);
+ ret = ioc_cache_validate(frame, ioc_inode, fd, trav);
+ if (ret == -1) {
+ ioc_inode_lock(ioc_inode);
+ {
+ waitq = __ioc_page_wakeup(trav, trav->op_errno);
}
+ ioc_inode_unlock(ioc_inode);
- trav_offset += table->page_size;
+ ioc_waitq_return(waitq);
+ waitq = NULL;
+ goto out;
+ }
}
+ trav_offset += table->page_size;
+ }
+
out:
- ioc_frame_return (frame);
+ ioc_frame_return(frame);
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune(ioc_inode->table)) {
+ ioc_prune(ioc_inode->table);
+ }
- return;
+ return;
}
-
/*
* ioc_readv -
*
@@ -1091,103 +1101,108 @@ out:
*
*/
int32_t
-ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = NULL;
- uint32_t weight = 0;
- ioc_table_t *table = NULL;
- int32_t op_errno = -1;
-
- if (!this) {
- goto out;
- }
-
- inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (!ioc_inode) {
- /* caching disabled, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset, flags, xdata);
- return 0;
- }
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = NULL;
+ uint32_t weight = 0;
+ ioc_table_t *table = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!this) {
+ goto out;
+ }
+
+ inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (!ioc_inode) {
+ /* caching disabled, go ahead with normal readv */
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
+ if (flags & O_DIRECT) {
+ /* disable caching for this fd, if O_DIRECT is used */
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
- table = this->private;
+ table = this->private;
- if (!table) {
- gf_log (this->name, GF_LOG_ERROR, "table is null");
- op_errno = EINVAL;
- goto out;
- }
+ if (!table) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL,
+ NULL);
+ op_errno = EINVAL;
+ goto out;
+ }
- ioc_inode_lock (ioc_inode);
- {
- if (!ioc_inode->cache.page_table) {
- ioc_inode->cache.page_table
- = rbthash_table_init
- (IOC_PAGE_TABLE_BUCKET_COUNT,
- ioc_hashfn, NULL, 0,
- table->mem_pool);
-
- if (ioc_inode->cache.page_table == NULL) {
- op_errno = ENOMEM;
- ioc_inode_unlock (ioc_inode);
- goto out;
- }
- }
- }
- ioc_inode_unlock (ioc_inode);
-
- if (!fd_ctx_get (fd, this, NULL)) {
- /* disable caching for this fd, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset, flags, xdata);
- return 0;
- }
+ ioc_inode_lock(ioc_inode);
+ {
+ if (!ioc_inode->cache.page_table) {
+ ioc_inode->cache.page_table = rbthash_table_init(
+ this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0,
+ table->mem_pool);
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ if (ioc_inode->cache.page_table == NULL) {
op_errno = ENOMEM;
+ ioc_inode_unlock(ioc_inode);
goto out;
+ }
}
+ }
+ ioc_inode_unlock(ioc_inode);
- INIT_LIST_HEAD (&local->fill_list);
-
- frame->local = local;
- local->pending_offset = offset;
- local->pending_size = size;
- local->offset = offset;
- local->size = size;
- local->inode = ioc_inode;
-
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
- frame, offset, size);
-
- weight = ioc_inode->weight;
-
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &ioc_inode->table->inode_lru[weight]);
- }
- ioc_table_unlock (ioc_inode->table);
-
- ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
+ if (!fd_ctx_get(fd, this, NULL)) {
+ /* disable caching for this fd, go ahead with normal readv */
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&local->fill_list);
+
+ frame->local = local;
+ local->pending_offset = offset;
+ local->pending_size = size;
+ local->offset = offset;
+ local->size = size;
+ local->inode = ioc_inode;
+ local->xattr_req = dict_ref(xdata);
+
+ gf_msg_trace(this->name, 0,
+ "NEW REQ (%p) offset "
+ "= %" PRId64 " && size = %" GF_PRI_SIZET "",
+ frame, offset, size);
+
+ weight = ioc_inode->weight;
+
+ ioc_table_lock(ioc_inode->table);
+ {
+ list_move_tail(&ioc_inode->inode_lru,
+ &ioc_inode->table->inode_lru[weight]);
+ }
+ ioc_table_unlock(ioc_inode->table);
+
+ ioc_dispatch_requests(frame, ioc_inode, fd, offset, size);
+ return 0;
out:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
- NULL);
- return 0;
+ STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
}
/*
@@ -1201,22 +1216,31 @@ out:
*
*/
int32_t
-ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
-
- local = frame->local;
- inode_ctx_get (local->fd->inode, this, &ioc_inode);
-
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
-
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
+
+ local = frame->local;
+ frame->local = NULL;
+ inode_ctx_get(local->fd->inode, this, &ioc_inode);
+
+ if (op_ret >= 0) {
+ ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector,
+ local->op_ret, op_ret, local->offset);
+ }
+
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ if (local->iobref) {
+ iobref_unref(local->iobref);
+ GF_FREE(local->vector);
+ }
+
+ mem_put(local);
+ return 0;
}
/*
@@ -1231,34 +1255,38 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*
*/
int32_t
-ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- /* TODO: why is it not fd_ref'ed */
- local->fd = fd;
- frame->local = local;
+ /* TODO: why is it not fd_ref'ed */
+ local->fd = fd;
+ frame->local = local;
- inode_ctx_get (fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get(fd->inode, this, &ioc_inode);
+ if (ioc_inode) {
+ local->iobref = iobref_ref(iobref);
+ local->vector = iov_dup(vector, count);
+ local->op_ret = count;
+ local->offset = offset;
+ }
- STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- flags, iobref, xdata);
+ STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
}
/*
@@ -1273,17 +1301,15 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
*
*/
int32_t
-ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
-
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
/*
* ioc_ftruncate_cbk -
*
@@ -1296,17 +1322,15 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*
*/
int32_t
-ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
-
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
/*
* ioc_truncate -
*
@@ -1317,19 +1341,19 @@ ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*
*/
int32_t
-ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ inode_ctx_get(loc->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
+ STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
/*
@@ -1342,680 +1366,719 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
*
*/
int32_t
-ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (fd->inode, this, &ioc_inode);
+ inode_ctx_get(fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
+ STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
int32_t
-ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
- return 0;
+ STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int32_t
-ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- ioc_inode_t *ioc_inode = NULL;
- uint64_t tmp_inode = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_inode;
- if (!ioc_inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "inode context is NULL: returning EBADFD");
- STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL);
- return 0;
- }
+ ioc_inode_t *ioc_inode = NULL;
+ uint64_t tmp_inode = 0;
+
+ inode_ctx_get(fd->inode, this, &tmp_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_inode;
+ if (!ioc_inode) {
+ gf_msg_debug(this->name, EBADFD,
+ "inode context is NULL: returning EBADFD");
+ STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL);
+ return 0;
+ }
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->cache.tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.last_revalidate = gf_time();
+ }
+ ioc_inode_unlock(ioc_inode);
- STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata);
+ STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
- return 0;
+ return 0;
}
int
-ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
+ gf_dirent_t *entry = NULL;
+ char *path = NULL;
+ fd_t *fd = NULL;
- if (op_ret <= 0)
- goto unwind;
+ fd = frame->local;
+ frame->local = NULL;
- list_for_each_entry (entry, &entries->list, list) {
- /* TODO: fill things */
- }
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ inode_path(fd->inode, entry->d_name, &path);
+ ioc_inode_update(this, entry->inode, path, &entry->d_stat);
+ GF_FREE(path);
+ path = NULL;
+ }
unwind:
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
+ return 0;
}
+
int
-ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
+ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- STACK_WIND (frame, ioc_readdirp_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset, dict);
+ frame->local = fd;
- return 0;
+ STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+
+ return 0;
}
static int32_t
ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *pre,
- struct iatt *post, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
- return 0;
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
}
static int32_t
ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
+ size_t len, dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (fd->inode, this, &ioc_inode);
+ inode_ctx_get(fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
- return 0;
+ STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
}
-
-
-int32_t
-ioc_get_priority_list (const char *opt_str, struct list_head *first)
+static int32_t
+ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct ioc_priority *curr = NULL, *tmp = NULL;
-
- string = gf_strdup (opt_str);
- if (string == NULL) {
- max_pri = -1;
- goto out;
- }
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- stripe_str = strtok_r (string, ",", &tmp_str);
- while (stripe_str) {
- curr = GF_CALLOC (1, sizeof (struct ioc_priority),
- gf_ioc_mt_ioc_priority);
- if (curr == NULL) {
- max_pri = -1;
- goto out;
- }
-
- list_add_tail (&curr->list, first);
-
- dup_str = gf_strdup (stripe_str);
- if (dup_str == NULL) {
- max_pri = -1;
- goto out;
- }
-
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
- max_pri = -1;
- goto out;
- }
-
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
- max_pri = -1;
- goto out;
- }
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
- gf_log ("io-cache", GF_LOG_TRACE,
- "ioc priority : pattern %s : priority %s",
- pattern,
- priority);
+static int32_t
+ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
- curr->pattern = gf_strdup (pattern);
- if (curr->pattern == NULL) {
- max_pri = -1;
- goto out;
- }
+ inode_ctx_get(fd->inode, this, &ioc_inode);
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
- max_pri = -1;
- goto out;
- } else {
- max_pri = max (max_pri, curr->priority);
- }
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- GF_FREE (dup_str);
- dup_str = NULL;
+ STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- }
+int32_t
+ioc_get_priority_list(const char *opt_str, struct list_head *first)
+{
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+
+ string = gf_strdup(opt_str);
+ if (string == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ stripe_str = strtok_r(string, ",", &tmp_str);
+ while (stripe_str) {
+ curr = GF_CALLOC(1, sizeof(struct ioc_priority),
+ gf_ioc_mt_ioc_priority);
+ if (curr == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ list_add_tail(&curr->list, first);
+
+ dup_str = gf_strdup(stripe_str);
+ if (dup_str == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ if (!pattern) {
+ max_pri = -1;
+ goto out;
+ }
+
+ priority = strtok_r(NULL, ":", &tmp_str1);
+ if (!priority) {
+ max_pri = -1;
+ goto out;
+ }
+
+ gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s",
+ pattern, priority);
+
+ curr->pattern = gf_strdup(pattern);
+ if (curr->pattern == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ curr->priority = strtol(priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
+ max_pri = -1;
+ goto out;
+ } else {
+ max_pri = max(max_pri, curr->priority);
+ }
+
+ GF_FREE(dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r(NULL, ",", &tmp_str);
+ }
out:
- GF_FREE (string);
+ GF_FREE(string);
- GF_FREE (dup_str);
+ GF_FREE(dup_str);
- if (max_pri == -1) {
- list_for_each_entry_safe (curr, tmp, first, list) {
- list_del_init (&curr->list);
- GF_FREE (curr->pattern);
- GF_FREE (curr);
- }
+ if (max_pri == -1) {
+ list_for_each_entry_safe(curr, tmp, first, list)
+ {
+ list_del_init(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
}
+ }
- return max_pri;
+ return max_pri;
}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_ioc_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_ioc_mt_end + 1);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL);
return ret;
-}
+ }
+ return ret;
+}
static gf_boolean_t
-check_cache_size_ok (xlator_t *this, uint64_t cache_size)
+check_cache_size_ok(xlator_t *this, uint64_t cache_size)
{
- gf_boolean_t ret = _gf_true;
- uint64_t total_mem = 0;
- uint64_t max_cache_size = 0;
- volume_option_t *opt = NULL;
-
- GF_ASSERT (this);
- opt = xlator_volume_option_get (this, "cache-size");
- if (!opt) {
- ret = _gf_false;
- gf_log (this->name, GF_LOG_ERROR,
- "could not get cache-size option");
- goto out;
- }
-
- total_mem = get_mem_size ();
- if (-1 == total_mem)
- max_cache_size = opt->max;
- else
- max_cache_size = total_mem;
-
- gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64,
- max_cache_size);
-
- if (cache_size > max_cache_size) {
- ret = _gf_false;
- gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64
- " is greater than the max size of %"PRIu64,
- cache_size, max_cache_size);
- goto out;
- }
+ gf_boolean_t ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT(this);
+ opt = xlator_volume_option_get(this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL);
+ goto out;
+ }
+
+ total_mem = get_mem_size();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);
+
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
+ "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64,
+ max_cache_size, NULL);
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
int
-reconfigure (xlator_t *this, dict_t *options)
+reconfigure(xlator_t *this, dict_t *options)
{
- data_t *data = NULL;
- ioc_table_t *table = NULL;
- int ret = -1;
- uint64_t cache_size_new = 0;
- if (!this || !this->private)
- goto out;
+ data_t *data = NULL;
+ ioc_table_t *table = NULL;
+ int ret = -1;
+ uint64_t cache_size_new = 0;
+ if (!this || !this->private)
+ goto out;
- table = this->private;
+ table = this->private;
- ioc_table_lock (table);
- {
- GF_OPTION_RECONF ("cache-timeout", table->cache_timeout,
- options, int32, unlock);
+ ioc_table_lock(table);
+ {
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool,
+ unlock);
- data = dict_get (options, "priority");
- if (data) {
- char *option_list = data_to_str (data);
+ GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, int32,
+ unlock);
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- table->max_pri = ioc_get_priority_list (option_list,
- &table->priority_list);
+ data = dict_get(options, "priority");
+ if (data) {
+ char *option_list = data_to_str(data);
- if (table->max_pri == -1) {
- goto unlock;
- }
- table->max_pri ++;
- }
+ gf_msg_trace(this->name, 0, "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list(option_list,
+ &table->priority_list);
- GF_OPTION_RECONF ("max-file-size", table->max_file_size,
- options, size, unlock);
+ if (table->max_pri == -1) {
+ goto unlock;
+ }
+ table->max_pri++;
+ }
- GF_OPTION_RECONF ("min-file-size", table->min_file_size,
- options, size, unlock);
+ GF_OPTION_RECONF("max-file-size", table->max_file_size, options,
+ size_uint64, unlock);
- if ((table->max_file_size >= 0) &&
- (table->min_file_size > table->max_file_size)) {
- gf_log (this->name, GF_LOG_ERROR, "minimum size (%"
- PRIu64") of a file that can be cached is "
- "greater than maximum size (%"PRIu64"). "
- "Hence Defaulting to old value",
- table->min_file_size, table->max_file_size);
- goto unlock;
- }
+ GF_OPTION_RECONF("min-file-size", table->min_file_size, options,
+ size_uint64, unlock);
- GF_OPTION_RECONF ("cache-size", cache_size_new,
- options, size, unlock);
- if (!check_cache_size_ok (this, cache_size_new)) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Not reconfiguring cache-size");
- goto unlock;
- }
- table->cache_size = cache_size_new;
+ if ((table->max_file_size <= UINT64_MAX) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD,
+ "minimum-size=%" PRIu64, table->min_file_size,
+ "maximum-size=%" PRIu64, table->max_file_size, NULL);
+ goto unlock;
+ }
- ret = 0;
+ GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64,
+ unlock);
+ if (!check_cache_size_ok(this, cache_size_new)) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL);
+ goto unlock;
}
+ table->cache_size = cache_size_new;
+
+ ret = 0;
+ }
unlock:
- ioc_table_unlock (table);
+ ioc_table_unlock(table);
out:
- return ret;
+ return ret;
}
-
/*
* init -
* @this:
*
*/
int32_t
-init (xlator_t *this)
+init(xlator_t *this)
{
- ioc_table_t *table = NULL;
- dict_t *xl_options = NULL;
- uint32_t index = 0;
- int32_t ret = -1;
- glusterfs_ctx_t *ctx = NULL;
- data_t *data = 0;
- uint32_t num_pages = 0;
-
- xl_options = this->options;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: io-cache not configured with exactly "
- "one child");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- table = (void *) GF_CALLOC (1, sizeof (*table), gf_ioc_mt_ioc_table_t);
- if (table == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- table->xl = this;
- table->page_size = this->ctx->page_size;
-
- GF_OPTION_INIT ("cache-size", table->cache_size, size, out);
-
- GF_OPTION_INIT ("cache-timeout", table->cache_timeout, int32, out);
-
- GF_OPTION_INIT ("min-file-size", table->min_file_size, size, out);
-
- GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out);
-
- if (!check_cache_size_ok (this, table->cache_size)) {
- ret = -1;
- goto out;
- }
-
- INIT_LIST_HEAD (&table->priority_list);
- table->max_pri = 1;
- data = dict_get (xl_options, "priority");
- if (data) {
- char *option_list = data_to_str (data);
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- table->max_pri = ioc_get_priority_list (option_list,
- &table->priority_list);
-
- if (table->max_pri == -1) {
- goto out;
- }
- }
- table->max_pri ++;
-
- INIT_LIST_HEAD (&table->inodes);
-
- if ((table->max_file_size >= 0)
- && (table->min_file_size > table->max_file_size)) {
- gf_log ("io-cache", GF_LOG_ERROR, "minimum size (%"
- PRIu64") of a file that can be cached is "
- "greater than maximum size (%"PRIu64")",
- table->min_file_size, table->max_file_size);
- goto out;
- }
-
- table->inode_lru = GF_CALLOC (table->max_pri,
- sizeof (struct list_head),
- gf_ioc_mt_list_head);
- if (table->inode_lru == NULL) {
- goto out;
- }
-
- for (index = 0; index < (table->max_pri); index++)
- INIT_LIST_HEAD (&table->inode_lru[index]);
-
- this->local_pool = mem_pool_new (ioc_local_t, 64);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- pthread_mutex_init (&table->table_lock, NULL);
- this->private = table;
-
- num_pages = (table->cache_size / table->page_size)
- + ((table->cache_size % table->page_size)
- ? 1 : 0);
-
- table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages);
- if (!table->mem_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to allocate mem_pool");
- goto out;
- }
-
- ret = 0;
-
- ctx = this->ctx;
- ioc_log2_page_size = log_base2 (ctx->page_size);
+ ioc_table_t *table = NULL;
+ dict_t *xl_options = NULL;
+ uint32_t index = 0;
+ int32_t ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ data_t *data = 0;
+ uint32_t num_pages = 0;
+
+ xl_options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED,
+ NULL);
+ }
+
+ table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t);
+ if (table == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ table->xl = this;
+ table->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out);
+
+ GF_OPTION_INIT("cache-timeout", table->cache_timeout, int32, out);
+
+ GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out);
+
+ GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out);
+
+ if (!check_cache_size_ok(this, table->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&table->priority_list);
+ table->max_pri = 1;
+ data = dict_get(xl_options, "priority");
+ if (data) {
+ char *option_list = data_to_str(data);
+ gf_msg_trace(this->name, 0, "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list(option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
+ goto out;
+ }
+ }
+ table->max_pri++;
+
+ INIT_LIST_HEAD(&table->inodes);
+
+ if ((table->max_file_size <= UINT64_MAX) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
+ "minimum-size=%" PRIu64, table->min_file_size,
+ "maximum-size=%" PRIu64, table->max_file_size, NULL);
+ goto out;
+ }
+
+ table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head),
+ gf_ioc_mt_list_head);
+ if (table->inode_lru == NULL) {
+ goto out;
+ }
+
+ for (index = 0; index < (table->max_pri); index++)
+ INIT_LIST_HEAD(&table->inode_lru[index]);
+
+ this->local_pool = mem_pool_new(ioc_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL);
+ goto out;
+ }
+
+ pthread_mutex_init(&table->table_lock, NULL);
+ this->private = table;
+
+ num_pages = (table->cache_size / table->page_size) +
+ ((table->cache_size % table->page_size) ? 1 : 0);
+
+ table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages);
+ if (!table->mem_pool) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL);
+ goto out;
+ }
+
+ ret = 0;
+
+ ctx = this->ctx;
+ ioc_log2_page_size = log_base2(ctx->page_size);
out:
- if (ret == -1) {
- if (table != NULL) {
- GF_FREE (table->inode_lru);
- GF_FREE (table);
- }
+ if (ret == -1) {
+ if (table != NULL) {
+ GF_FREE(table->inode_lru);
+ GF_FREE(table);
}
+ }
- return ret;
+ return ret;
}
void
-ioc_page_waitq_dump (ioc_page_t *page, char *prefix)
+ioc_page_waitq_dump(ioc_page_t *page, char *prefix)
{
- ioc_waitq_t *trav = NULL;
- call_frame_t *frame = NULL;
- int32_t i = 0;
- char key[GF_DUMP_MAX_BUF_LEN] = {0, };
-
- trav = page->waitq;
-
- while (trav) {
- frame = trav->data;
- sprintf (key, "waitq.frame[%d]", i++);
- gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
-
- trav = trav->next;
- }
+ ioc_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ trav = page->waitq;
+
+ while (trav) {
+ frame = trav->data;
+ sprintf(key, "waitq.frame[%d]", i++);
+ gf_proc_dump_write(key, "%" PRId64, frame->root->unique);
+
+ trav = trav->next;
+ }
}
void
-__ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix)
+__ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix)
{
- ioc_waitq_t *trav = NULL;
- ioc_page_t *page = NULL;
- int32_t i = 0;
- char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ ioc_waitq_t *trav = NULL;
+ ioc_page_t *page = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
- trav = ioc_inode->waitq;
+ trav = ioc_inode->waitq;
- while (trav) {
- page = trav->data;
+ while (trav) {
+ page = trav->data;
- sprintf (key, "cache-validation-waitq.page[%d].offset", i++);
- gf_proc_dump_write (key, "%"PRId64, page->offset);
+ sprintf(key, "cache-validation-waitq.page[%d].offset", i++);
+ gf_proc_dump_write(key, "%" PRId64, page->offset);
- trav = trav->next;
- }
+ trav = trav->next;
+ }
}
void
-__ioc_page_dump (ioc_page_t *page, char *prefix)
+__ioc_page_dump(ioc_page_t *page, char *prefix)
{
+ int ret = -1;
- int ret = -1;
-
- if (!page)
- return;
- /* ioc_page_lock can be used to hold the mutex. But in statedump
- * its better to use trylock to avoid deadlocks.
- */
- ret = pthread_mutex_trylock (&page->page_lock);
- if (ret)
- goto out;
- {
- gf_proc_dump_write ("offset", "%"PRId64, page->offset);
- gf_proc_dump_write ("size", "%"PRId64, page->size);
- gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
- gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
- ioc_page_waitq_dump (page, prefix);
- }
- pthread_mutex_unlock (&page->page_lock);
+ if (!page)
+ return;
+ /* ioc_page_lock can be used to hold the mutex. But in statedump
+ * its better to use trylock to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock(&page->page_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write("offset", "%" PRId64, page->offset);
+ gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);
+ gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");
+ ioc_page_waitq_dump(page, prefix);
+ }
+ pthread_mutex_unlock(&page->page_lock);
out:
- if (ret && page)
- gf_proc_dump_write ("Unable to dump the page information",
- "(Lock acquisition failed) %p", page);
+ if (ret && page)
+ gf_proc_dump_write("Unable to dump the page information",
+ "(Lock acquisition failed) %p", page);
- return;
+ return;
}
void
-__ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix)
+__ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
{
- off_t offset = 0;
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- int i = 0;
- char key[GF_DUMP_MAX_BUF_LEN] = {0, };
- char timestr[256] = {0, };
-
- if ((ioc_inode == NULL) || (prefix == NULL)) {
- goto out;
- }
-
- table = ioc_inode->table;
-
- if (ioc_inode->cache.tv.tv_sec) {
- gf_time_fmt (timestr, sizeof timestr,
- ioc_inode->cache.tv.tv_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec);
-
- gf_proc_dump_write ("last-cache-validation-time", "%s",
- timestr);
- }
-
- for (offset = 0; offset < ioc_inode->ia_size;
- offset += table->page_size) {
- page = __ioc_page_get (ioc_inode, offset);
- if (page == NULL) {
- continue;
- }
-
- sprintf (key, "inode.cache.page[%d]", i++);
- __ioc_page_dump (page, key);
- }
+ off_t offset = 0;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ if ((ioc_inode == NULL) || (prefix == NULL)) {
+ goto out;
+ }
+
+ table = ioc_inode->table;
+
+ if (ioc_inode->cache.last_revalidate) {
+ gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate,
+ gf_timefmt_FT);
+
+ gf_proc_dump_write("last-cache-validation-time", "%s", timestr);
+ }
+
+ for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) {
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page == NULL) {
+ continue;
+ }
+
+ sprintf(key, "inode.cache.page[%d]", i++);
+ __ioc_page_dump(page, key);
+ }
out:
- return;
+ return;
}
-
int
-ioc_inode_dump (xlator_t *this, inode_t *inode)
+ioc_inode_dump(xlator_t *this, inode_t *inode)
{
-
- char *path = NULL;
- int ret = -1;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- gf_boolean_t section_added = _gf_false;
- char uuid_str[64] = {0,};
-
- if (this == NULL || inode == NULL)
- goto out;
-
- gf_proc_dump_build_key (key_prefix, "io-cache", "inode");
-
- inode_ctx_get (inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (ioc_inode == NULL)
- goto out;
-
- /* Similar to ioc_page_dump function its better to use
- * pthread_mutex_trylock and not to use gf_log in statedump
- * to avoid deadlocks.
- */
- ret = pthread_mutex_trylock (&ioc_inode->inode_lock);
- if (ret)
- goto out;
-
- {
- if (uuid_is_null (ioc_inode->inode->gfid))
- goto unlock;
-
- gf_proc_dump_add_section (key_prefix);
- section_added = _gf_true;
-
- __inode_path (ioc_inode->inode, NULL, &path);
-
- gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight);
-
- if (path) {
- gf_proc_dump_write ("path", "%s", path);
- GF_FREE (path);
- }
-
- gf_proc_dump_write ("uuid", "%s", uuid_utoa_r
- (ioc_inode->inode->gfid, uuid_str));
- __ioc_cache_dump (ioc_inode, key_prefix);
- __ioc_inode_waitq_dump (ioc_inode, key_prefix);
- }
+ char *path = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ gf_boolean_t section_added = _gf_false;
+ char uuid_str[64] = {
+ 0,
+ };
+
+ if (this == NULL || inode == NULL)
+ goto out;
+
+ gf_proc_dump_build_key(key_prefix, "io-cache", "inode");
+
+ inode_ctx_get(inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (ioc_inode == NULL)
+ goto out;
+
+ /* Similar to ioc_page_dump function its better to use
+ * pthread_mutex_trylock and not to use gf_log in statedump
+ * to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock(&ioc_inode->inode_lock);
+ if (ret)
+ goto out;
+
+ {
+ if (gf_uuid_is_null(ioc_inode->inode->gfid))
+ goto unlock;
+
+ gf_proc_dump_add_section("%s", key_prefix);
+ section_added = _gf_true;
+
+ __inode_path(ioc_inode->inode, NULL, &path);
+
+ gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight);
+
+ if (path) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
+
+ gf_proc_dump_write("uuid", "%s",
+ uuid_utoa_r(ioc_inode->inode->gfid, uuid_str));
+ __ioc_cache_dump(ioc_inode, key_prefix);
+ __ioc_inode_waitq_dump(ioc_inode, key_prefix);
+ }
unlock:
- pthread_mutex_unlock (&ioc_inode->inode_lock);
+ pthread_mutex_unlock(&ioc_inode->inode_lock);
out:
- if (ret && ioc_inode) {
- if (section_added == _gf_false)
- gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_write ("Unable to print the status of ioc_inode",
- "(Lock acquisition failed) %s",
- uuid_utoa (inode->gfid));
- }
- return ret;
+ if (ret && ioc_inode) {
+ if (section_added == _gf_false)
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("Unable to print the status of ioc_inode",
+ "(Lock acquisition failed) %s",
+ uuid_utoa(inode->gfid));
+ }
+ return ret;
}
int
-ioc_priv_dump (xlator_t *this)
+ioc_priv_dump(xlator_t *this)
{
- ioc_table_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- int ret = -1;
- gf_boolean_t add_section = _gf_false;
-
- if (!this || !this->private)
- goto out;
-
- priv = this->private;
-
- gf_proc_dump_build_key (key_prefix, "io-cache", "priv");
- gf_proc_dump_add_section (key_prefix);
- add_section = _gf_true;
-
- ret = pthread_mutex_trylock (&priv->table_lock);
- if (ret)
- goto out;
- {
- gf_proc_dump_write ("page_size", "%ld", priv->page_size);
- gf_proc_dump_write ("cache_size", "%ld", priv->cache_size);
- gf_proc_dump_write ("cache_used", "%ld", priv->cache_used);
- gf_proc_dump_write ("inode_count", "%u", priv->inode_count);
- gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout);
- gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size);
- gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size);
- }
- pthread_mutex_unlock (&priv->table_lock);
+ ioc_table_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = -1;
+ gf_boolean_t add_section = _gf_false;
+
+ if (!this || !this->private)
+ goto out;
+
+ priv = this->private;
+
+ gf_proc_dump_build_key(key_prefix, "io-cache", "priv");
+ gf_proc_dump_add_section("%s", key_prefix);
+ add_section = _gf_true;
+
+ ret = pthread_mutex_trylock(&priv->table_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size);
+ gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size);
+ gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used);
+ gf_proc_dump_write("inode_count", "%u", priv->inode_count);
+ gf_proc_dump_write("cache_timeout", "%u", priv->cache_timeout);
+ gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size);
+ gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size);
+ }
+ pthread_mutex_unlock(&priv->table_lock);
out:
- if (ret && priv) {
- if (!add_section) {
- gf_proc_dump_build_key (key_prefix, "xlator."
- "performance.io-cache", "priv");
- gf_proc_dump_add_section (key_prefix);
- }
- gf_proc_dump_write ("Unable to dump the state of private "
- "structure of io-cache xlator", "(Lock "
- "acquisition failed) %s", this->name);
- }
-
- return 0;
+ if (ret && priv) {
+ if (!add_section) {
+ gf_proc_dump_build_key(key_prefix,
+ "xlator."
+ "performance.io-cache",
+ "priv");
+ gf_proc_dump_add_section("%s", key_prefix);
+ }
+ gf_proc_dump_write(
+ "Unable to dump the state of private "
+ "structure of io-cache xlator",
+ "(Lock "
+ "acquisition failed) %s",
+ this->name);
+ }
+
+ return 0;
}
/*
@@ -2025,106 +2088,144 @@ out:
*
*/
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- ioc_table_t *table = NULL;
- struct ioc_priority *curr = NULL, *tmp = NULL;
- int i = 0;
-
- table = this->private;
-
- if (table == NULL)
- return;
-
- this->private = NULL;
-
- if (table->mem_pool != NULL) {
- mem_pool_destroy (table->mem_pool);
- table->mem_pool = NULL;
- }
-
- list_for_each_entry_safe (curr, tmp, &table->priority_list, list) {
- list_del_init (&curr->list);
- GF_FREE (curr->pattern);
- GF_FREE (curr);
- }
-
- for (i = 0; i < table->max_pri; i++) {
- GF_ASSERT (list_empty (&table->inode_lru[i]));
- }
+ ioc_table_t *table = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
- GF_ASSERT (list_empty (&table->inodes));
- pthread_mutex_destroy (&table->table_lock);
- GF_FREE (table);
+ table = this->private;
- this->private = NULL;
+ if (table == NULL)
return;
+
+ this->private = NULL;
+
+ if (table->mem_pool != NULL) {
+ mem_pool_destroy(table->mem_pool);
+ table->mem_pool = NULL;
+ }
+
+ list_for_each_entry_safe(curr, tmp, &table->priority_list, list)
+ {
+ list_del_init(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
+ }
+
+ /* inode_lru and inodes list can be empty in case fini() is
+ * called soon after init()? Hence commenting the below asserts.
+ */
+ /*for (i = 0; i < table->max_pri; i++) {
+ GF_ASSERT (list_empty (&table->inode_lru[i]));
+ }
+
+ GF_ASSERT (list_empty (&table->inodes));
+ */
+ pthread_mutex_destroy(&table->table_lock);
+ GF_FREE(table);
+
+ this->private = NULL;
+ return;
}
struct xlator_fops fops = {
- .open = ioc_open,
- .create = ioc_create,
- .readv = ioc_readv,
- .writev = ioc_writev,
- .truncate = ioc_truncate,
- .ftruncate = ioc_ftruncate,
- .lookup = ioc_lookup,
- .lk = ioc_lk,
- .setattr = ioc_setattr,
- .mknod = ioc_mknod,
-
- .readdirp = ioc_readdirp,
- .discard = ioc_discard,
+ .open = ioc_open,
+ .create = ioc_create,
+ .readv = ioc_readv,
+ .writev = ioc_writev,
+ .truncate = ioc_truncate,
+ .ftruncate = ioc_ftruncate,
+ .lookup = ioc_lookup,
+ .lk = ioc_lk,
+ .setattr = ioc_setattr,
+ .mknod = ioc_mknod,
+
+ .readdirp = ioc_readdirp,
+ .discard = ioc_discard,
+ .zerofill = ioc_zerofill,
};
-
struct xlator_dumpops dumpops = {
- .priv = ioc_priv_dump,
- .inodectx = ioc_inode_dump,
+ .priv = ioc_priv_dump,
+ .inodectx = ioc_inode_dump,
};
struct xlator_cbks cbks = {
- .forget = ioc_forget,
- .release = ioc_release,
- .invalidate = ioc_invalidate,
+ .forget = ioc_forget,
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_PRIORITY_LIST,
- .default_value = "",
- .description = "Assigns priority to filenames with specific "
- "patterns so that when a page needs to be ejected "
- "out of the cache, the page of a file whose "
- "priority is the lowest will be ejected earlier"
- },
- { .key = {"cache-timeout", "force-revalidate-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 60,
- .default_value = "1",
- .description = "The cached data for a file will be retained till "
- "'cache-refresh-timeout' seconds, after which data "
- "re-validation is performed."
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 4 * GF_UNIT_MB,
- .max = 32 * GF_UNIT_GB,
- .default_value = "32MB",
- .description = "Size of the read cache."
- },
- { .key = {"min-file-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .default_value = "0",
- .description = "Minimum file size which would be cached by the "
- "io-cache translator."
- },
- { .key = {"max-file-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .default_value = "0",
- .description = "Maximum file size which would be cached by the "
- "io-cache translator."
- },
- { .key = {NULL} },
+ {
+ .key = {"io-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable io-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"priority"},
+ .type = GF_OPTION_TYPE_PRIORITY_LIST,
+ .default_value = "",
+ .description = "Assigns priority to filenames with specific "
+ "patterns so that when a page needs to be ejected "
+ "out of the cache, the page of a file whose "
+ "priority is the lowest will be ejected earlier",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"cache-timeout", "force-revalidate-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60,
+ .default_value = "1",
+ .description = "The cached data for a file will be retained for "
+ "'cache-refresh-timeout' seconds, after which data "
+ "re-validation is performed.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4 * GF_UNIT_MB,
+ .max = INFINITY,
+ .default_value = "32MB",
+ .description = "Size of the read cache.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Minimum file size which would be cached by the "
+ "io-cache translator.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Maximum file size which would be cached by the "
+ "io-cache translator.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-cache"},
+ .description = "Enable/Disable io cache translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-cache",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
index 46d758a6608..14923c75edc 100644
--- a/xlators/performance/io-cache/src/io-cache.h
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -11,27 +11,19 @@
#ifndef __IO_CACHE_H
#define __IO_CACHE_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/types.h>
-#include "compat-errno.h"
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "call-stub.h"
-#include "rbthash.h"
-#include "hashfn.h"
+#include <glusterfs/compat-errno.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/rbthash.h>
#include <sys/time.h>
#include <fnmatch.h>
+#include "io-cache-messages.h"
-#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
-#define IOC_CACHE_SIZE (32 * 1024 * 1024)
+#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
+#define IOC_CACHE_SIZE (32 * 1024 * 1024)
#define IOC_PAGE_TABLE_BUCKET_COUNT 1
struct ioc_table;
@@ -40,9 +32,9 @@ struct ioc_page;
struct ioc_inode;
struct ioc_priority {
- struct list_head list;
- char *pattern;
- uint32_t priority;
+ struct list_head list;
+ char *pattern;
+ uint32_t priority;
};
/*
@@ -53,10 +45,10 @@ struct ioc_priority {
* @data: pointer to the frame which is waiting
*/
struct ioc_waitq {
- struct ioc_waitq *next;
- void *data;
- off_t pending_offset;
- size_t pending_size;
+ struct ioc_waitq *next;
+ void *data;
+ off_t pending_offset;
+ size_t pending_size;
};
/*
@@ -64,39 +56,41 @@ struct ioc_waitq {
*
*/
struct ioc_fill {
- struct list_head list; /* list of ioc_fill structures of a frame */
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
- struct iobref *iobref;
+ struct list_head list; /* list of ioc_fill structures of a frame */
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
};
struct ioc_local {
- mode_t mode;
- int32_t flags;
- loc_t file_loc;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- struct list_head fill_list; /* list of ioc_fill structures */
- off_t pending_offset; /*
- * offset from this frame should
- * continue
- */
- size_t pending_size; /*
- * size of data this frame is waiting
- * on
- */
- struct ioc_inode *inode;
- int32_t wait_count;
- pthread_mutex_t local_lock;
- struct ioc_waitq *waitq;
- void *stub;
- fd_t *fd;
- int32_t need_xattr;
- dict_t *xattr_req;
+ mode_t mode;
+ int32_t flags;
+ loc_t file_loc;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ struct list_head fill_list; /* list of ioc_fill structures */
+ off_t pending_offset; /*
+ * offset from this frame should
+ * continue
+ */
+ size_t pending_size; /*
+ * size of data this frame is waiting
+ * on
+ */
+ struct ioc_inode *inode;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
+ struct ioc_waitq *waitq;
+ void *stub;
+ fd_t *fd;
+ struct iovec *vector;
+ struct iobref *iobref;
+ int32_t need_xattr;
+ dict_t *xattr_req;
};
/*
@@ -104,71 +98,69 @@ struct ioc_local {
*
*/
struct ioc_page {
- struct list_head page_lru;
- struct ioc_inode *inode; /* inode this page belongs to */
- struct ioc_priority *priority;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ioc_waitq *waitq;
- struct iobref *iobref;
- pthread_mutex_t page_lock;
- int32_t op_errno;
- char stale;
+ struct list_head page_lru;
+ struct ioc_inode *inode; /* inode this page belongs to */
+ struct ioc_priority *priority;
+ char dirty;
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ioc_waitq *waitq;
+ struct iobref *iobref;
+ pthread_mutex_t page_lock;
+ int32_t op_errno;
+ char stale;
};
struct ioc_cache {
- rbthash_table_t *page_table;
- struct list_head page_lru;
- time_t mtime; /*
- * seconds component of file mtime
- */
- time_t mtime_nsec; /*
- * nanosecond component of file mtime
- */
- struct timeval tv; /*
- * time-stamp at last re-validate
- */
+ rbthash_table_t *page_table;
+ struct list_head page_lru;
+ time_t mtime; /*
+ * seconds component of file mtime
+ */
+ time_t mtime_nsec; /*
+ * nanosecond component of file mtime
+ */
+ time_t last_revalidate; /* timestamp at last re-validate */
};
struct ioc_inode {
- struct ioc_table *table;
- off_t ia_size;
- struct ioc_cache cache;
- struct list_head inode_list; /*
- * list of inodes, maintained by
- * io-cache translator
- */
- struct list_head inode_lru;
- struct ioc_waitq *waitq;
- pthread_mutex_t inode_lock;
- uint32_t weight; /*
- * weight of the inode, increases
- * on each read
- */
- inode_t *inode;
+ struct ioc_table *table;
+ off_t ia_size;
+ struct ioc_cache cache;
+ struct list_head inode_list; /*
+ * list of inodes, maintained by
+ * io-cache translator
+ */
+ struct list_head inode_lru;
+ struct ioc_waitq *waitq;
+ pthread_mutex_t inode_lock;
+ uint32_t weight; /*
+ * weight of the inode, increases
+ * on each read
+ */
+ inode_t *inode;
};
struct ioc_table {
- uint64_t page_size;
- uint64_t cache_size;
- uint64_t cache_used;
- uint64_t min_file_size;
- uint64_t max_file_size;
- struct list_head inodes; /* list of inodes cached */
- struct list_head active;
- struct list_head *inode_lru;
- struct list_head priority_list;
- int32_t readv_count;
- pthread_mutex_t table_lock;
- xlator_t *xl;
- uint32_t inode_count;
- int32_t cache_timeout;
- int32_t max_pri;
- struct mem_pool *mem_pool;
+ uint64_t page_size;
+ uint64_t cache_size;
+ uint64_t cache_used;
+ uint64_t min_file_size;
+ uint64_t max_file_size;
+ struct list_head inodes; /* list of inodes cached */
+ struct list_head active;
+ struct list_head *inode_lru;
+ struct list_head priority_list;
+ int32_t readv_count;
+ pthread_mutex_t table_lock;
+ xlator_t *xl;
+ uint32_t inode_count;
+ int32_t cache_timeout;
+ int32_t max_pri;
+ struct mem_pool *mem_pool;
};
typedef struct ioc_table ioc_table_t;
@@ -179,154 +171,136 @@ typedef struct ioc_waitq ioc_waitq_t;
typedef struct ioc_fill ioc_fill_t;
void *
-str_to_ptr (char *string);
+str_to_ptr(char *string);
char *
-ptr_to_str (void *ptr);
+ptr_to_str(void *ptr);
int32_t
-ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata);
+ioc_readv_disabled_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata);
ioc_page_t *
-__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_get(ioc_inode_t *ioc_inode, off_t offset);
ioc_page_t *
-__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_create(ioc_inode_t *ioc_inode, off_t offset);
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset);
+ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset);
void
-__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
+__ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size);
ioc_waitq_t *
-__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno);
+__ioc_page_wakeup(ioc_page_t *page, int32_t op_errno);
void
-ioc_page_flush (ioc_page_t *page);
+ioc_page_flush(ioc_page_t *page);
ioc_waitq_t *
-__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
+__ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno);
void
-ioc_frame_return (call_frame_t *frame);
+ioc_frame_return(call_frame_t *frame);
void
-ioc_waitq_return (ioc_waitq_t *waitq);
+ioc_waitq_return(ioc_waitq_t *waitq);
int32_t
-ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size, int32_t op_errno);
-
-#define ioc_inode_lock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "locked inode(%p)", ioc_inode); \
- pthread_mutex_lock (&ioc_inode->inode_lock); \
- } while (0)
-
-
-#define ioc_inode_unlock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked inode(%p)", ioc_inode); \
- pthread_mutex_unlock (&ioc_inode->inode_lock); \
- } while (0)
-
-
-#define ioc_table_lock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "locked table(%p)", table); \
- pthread_mutex_lock (&table->table_lock); \
- } while (0)
-
-
-#define ioc_table_unlock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "unlocked table(%p)", table); \
- pthread_mutex_unlock (&table->table_lock); \
- } while (0)
-
-
-#define ioc_local_lock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "locked local(%p)", local); \
- pthread_mutex_lock (&local->local_lock); \
- } while (0)
-
-
-#define ioc_local_unlock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked local(%p)", local); \
- pthread_mutex_unlock (&local->local_lock); \
- } while (0)
-
-
-#define ioc_page_lock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "locked page(%p)", page); \
- pthread_mutex_lock (&page->page_lock); \
- } while (0)
-
-
-#define ioc_page_unlock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked page(%p)", page); \
- pthread_mutex_unlock (&page->page_lock); \
- } while (0)
-
-
-static inline uint64_t
-time_elapsed (struct timeval *now,
- struct timeval *then)
-{
- uint64_t sec = now->tv_sec - then->tv_sec;
-
- if (sec)
- return sec;
-
- return 0;
-}
+ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size,
+ int32_t op_errno);
+
+#define ioc_inode_lock(ioc_inode) \
+ do { \
+ gf_msg_trace(ioc_inode->table->xl->name, 0, "locked inode(%p)", \
+ ioc_inode); \
+ pthread_mutex_lock(&ioc_inode->inode_lock); \
+ } while (0)
+
+#define ioc_inode_unlock(ioc_inode) \
+ do { \
+ gf_msg_trace(ioc_inode->table->xl->name, 0, "unlocked inode(%p)", \
+ ioc_inode); \
+ pthread_mutex_unlock(&ioc_inode->inode_lock); \
+ } while (0)
+
+#define ioc_table_lock(table) \
+ do { \
+ gf_msg_trace(table->xl->name, 0, "locked table(%p)", table); \
+ pthread_mutex_lock(&table->table_lock); \
+ } while (0)
+
+#define ioc_table_unlock(table) \
+ do { \
+ gf_msg_trace(table->xl->name, 0, "unlocked table(%p)", table); \
+ pthread_mutex_unlock(&table->table_lock); \
+ } while (0)
+
+#define ioc_local_lock(local) \
+ do { \
+ gf_msg_trace(local->inode->table->xl->name, 0, "locked local(%p)", \
+ local); \
+ pthread_mutex_lock(&local->local_lock); \
+ } while (0)
+
+#define ioc_local_unlock(local) \
+ do { \
+ gf_msg_trace(local->inode->table->xl->name, 0, "unlocked local(%p)", \
+ local); \
+ pthread_mutex_unlock(&local->local_lock); \
+ } while (0)
+
+#define ioc_page_lock(page) \
+ do { \
+ gf_msg_trace(page->inode->table->xl->name, 0, "locked page(%p)", \
+ page); \
+ pthread_mutex_lock(&page->page_lock); \
+ } while (0)
+
+#define ioc_page_unlock(page) \
+ do { \
+ gf_msg_trace(page->inode->table->xl->name, 0, "unlocked page(%p)", \
+ page); \
+ pthread_mutex_unlock(&page->page_lock); \
+ } while (0)
ioc_inode_t *
-ioc_inode_search (ioc_table_t *table, inode_t *inode);
+ioc_inode_search(ioc_table_t *table, inode_t *inode);
void
-ioc_inode_destroy (ioc_inode_t *ioc_inode);
+ioc_inode_destroy(ioc_inode_t *ioc_inode);
+
+int32_t
+ioc_inode_update(xlator_t *this, inode_t *inode, char *path,
+ struct iatt *iabuf);
ioc_inode_t *
-ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight);
+ioc_inode_create(ioc_table_t *table, inode_t *inode, uint32_t weight);
int64_t
-__ioc_page_destroy (ioc_page_t *page);
+__ioc_page_destroy(ioc_page_t *page);
int64_t
-__ioc_inode_flush (ioc_inode_t *ioc_inode);
+__ioc_inode_flush(ioc_inode_t *ioc_inode);
void
-ioc_inode_flush (ioc_inode_t *ioc_inode);
+ioc_inode_flush(ioc_inode_t *ioc_inode);
void
-ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
- struct iatt *stbuf);
+ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iatt *stbuf);
int8_t
-ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf);
+ioc_cache_still_valid(ioc_inode_t *ioc_inode, struct iatt *stbuf);
int32_t
-ioc_prune (ioc_table_t *table);
+ioc_prune(ioc_table_t *table);
int32_t
-ioc_need_prune (ioc_table_t *table);
+ioc_need_prune(ioc_table_t *table);
#endif /* __IO_CACHE_H */
diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c
index 86a54bb14ca..97767d85285 100644
--- a/xlators/performance/io-cache/src/ioc-inode.c
+++ b/xlators/performance/io-cache/src/ioc-inode.c
@@ -8,11 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "io-cache.h"
#include "ioc-mem-types.h"
@@ -24,145 +19,140 @@ extern int ioc_log2_page_size;
*
*/
void *
-str_to_ptr (char *string)
+str_to_ptr(char *string)
{
- void *ptr = NULL;
+ void *ptr = NULL;
- GF_VALIDATE_OR_GOTO ("io-cache", string, out);
+ GF_VALIDATE_OR_GOTO("io-cache", string, out);
- ptr = (void *)strtoul (string, NULL, 16);
+ ptr = (void *)strtoul(string, NULL, 16);
out:
- return ptr;
+ return ptr;
}
-
/*
* ptr_to_str - convert a pointer to string
* @ptr: pointer
*
*/
char *
-ptr_to_str (void *ptr)
+ptr_to_str(void *ptr)
{
- int ret = 0;
- char *str = NULL;
+ int ret = 0;
+ char *str = NULL;
- GF_VALIDATE_OR_GOTO ("io-cache", ptr, out);
+ GF_VALIDATE_OR_GOTO("io-cache", ptr, out);
- ret = gf_asprintf (&str, "%p", ptr);
- if (-1 == ret) {
- gf_log ("io-cache", GF_LOG_WARNING,
- "asprintf failed while converting ptr to str");
- str = NULL;
- goto out;
- }
+ ret = gf_asprintf(&str, "%p", ptr);
+ if (-1 == ret) {
+ gf_smsg("io-cache", GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_STR_COVERSION_FAILED, NULL);
+ str = NULL;
+ goto out;
+ }
out:
- return str;
+ return str;
}
-
void
-ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
- struct iatt *stbuf)
+ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iatt *stbuf)
{
- ioc_waitq_t *waiter = NULL, *waited = NULL;
- ioc_waitq_t *page_waitq = NULL;
- int8_t cache_still_valid = 1;
- ioc_local_t *local = NULL;
- int8_t need_fault = 0;
- ioc_page_t *waiter_page = NULL;
-
- GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
-
- if (ioc_inode == NULL) {
- local->op_ret = -1;
- local->op_errno = EINVAL;
- gf_log (frame->this->name, GF_LOG_WARNING, "ioc_inode is NULL");
- goto out;
- }
-
- ioc_inode_lock (ioc_inode);
- {
- waiter = ioc_inode->waitq;
- ioc_inode->waitq = NULL;
- }
- ioc_inode_unlock (ioc_inode);
-
- if (stbuf)
- cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
- else
- cache_still_valid = 0;
-
+ ioc_waitq_t *waiter = NULL, *waited = NULL;
+ ioc_waitq_t *page_waitq = NULL;
+ int8_t cache_still_valid = 1;
+ ioc_local_t *local = NULL;
+ int8_t need_fault = 0;
+ ioc_page_t *waiter_page = NULL;
+
+ GF_VALIDATE_OR_GOTO("io-cache", frame, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(frame->this->name, local, out);
+
+ if (ioc_inode == NULL) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_INODE_NULL,
+ NULL);
+ goto out;
+ }
+
+ if (stbuf)
+ cache_still_valid = ioc_cache_still_valid(ioc_inode, stbuf);
+ else
+ cache_still_valid = 0;
+
+ ioc_inode_lock(ioc_inode);
+ {
+ waiter = ioc_inode->waitq;
if (!waiter) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "cache validate called without any "
- "page waiting to be validated");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_PAGE_WAIT_VALIDATE, NULL);
+
+ ioc_inode_unlock(ioc_inode);
+ goto out;
}
while (waiter) {
- waiter_page = waiter->data;
- page_waitq = NULL;
-
- if (waiter_page) {
- if (cache_still_valid) {
- /* cache valid, wake up page */
- ioc_inode_lock (ioc_inode);
- {
- page_waitq =
- __ioc_page_wakeup (waiter_page,
- waiter_page->op_errno);
- }
- ioc_inode_unlock (ioc_inode);
- if (page_waitq)
- ioc_waitq_return (page_waitq);
- } else {
- /* cache invalid, generate page fault and set
- * page->ready = 0, to avoid double faults
- */
- ioc_inode_lock (ioc_inode);
- {
- if (waiter_page->ready) {
- waiter_page->ready = 0;
- need_fault = 1;
- } else {
- gf_log (frame->this->name,
- GF_LOG_TRACE,
- "validate frame(%p) is "
- "waiting for in-transit"
- " page = %p", frame,
- waiter_page);
- }
- }
- ioc_inode_unlock (ioc_inode);
-
- if (need_fault) {
- need_fault = 0;
- ioc_page_fault (ioc_inode, frame,
- local->fd,
- waiter_page->offset);
- }
- }
+ waiter_page = waiter->data;
+ ioc_inode->waitq = waiter->next;
+ page_waitq = NULL;
+
+ if (waiter_page) {
+ if (cache_still_valid) {
+ /* cache valid, wake up page */
+ page_waitq = __ioc_page_wakeup(waiter_page,
+ waiter_page->op_errno);
+ if (page_waitq) {
+ ioc_inode_unlock(ioc_inode);
+ ioc_waitq_return(page_waitq);
+ ioc_inode_lock(ioc_inode);
+ }
+ } else {
+ /* cache invalid, generate page fault and set
+ * page->ready = 0, to avoid double faults
+ */
+ if (waiter_page->ready) {
+ waiter_page->ready = 0;
+ need_fault = 1;
+ } else {
+ gf_msg_trace(frame->this->name, 0,
+ "validate "
+ "frame(%p) is "
+ "waiting for "
+ "in-transit"
+ " page = %p",
+ frame, waiter_page);
+ }
+
+ if (need_fault) {
+ need_fault = 0;
+ ioc_inode_unlock(ioc_inode);
+ ioc_page_fault(ioc_inode, frame, local->fd,
+ waiter_page->offset);
+ ioc_inode_lock(ioc_inode);
+ }
}
+ }
- waited = waiter;
- waiter = waiter->next;
+ waited = waiter;
+ waiter = ioc_inode->waitq;
- waited->data = NULL;
- GF_FREE (waited);
+ waited->data = NULL;
+ GF_FREE(waited);
}
+ }
+ ioc_inode_unlock(ioc_inode);
out:
- return;
+ return;
}
-
/*
- * ioc_inode_update - create a new ioc_inode_t structure and add it to
+ * ioc_inode_create - create a new ioc_inode_t structure and add it to
* the table table. fill in the fields which are derived
* from inode_t corresponding to the file
*
@@ -172,40 +162,37 @@ out:
* not for external reference
*/
ioc_inode_t *
-ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight)
+ioc_inode_create(ioc_table_t *table, inode_t *inode, uint32_t weight)
{
- ioc_inode_t *ioc_inode = NULL;
+ ioc_inode_t *ioc_inode = NULL;
- GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+ GF_VALIDATE_OR_GOTO("io-cache", table, out);
- ioc_inode = GF_CALLOC (1, sizeof (ioc_inode_t), gf_ioc_mt_ioc_inode_t);
- if (ioc_inode == NULL) {
- goto out;
- }
+ ioc_inode = GF_CALLOC(1, sizeof(ioc_inode_t), gf_ioc_mt_ioc_inode_t);
+ if (ioc_inode == NULL) {
+ goto out;
+ }
- ioc_inode->inode = inode;
- ioc_inode->table = table;
- INIT_LIST_HEAD (&ioc_inode->cache.page_lru);
- pthread_mutex_init (&ioc_inode->inode_lock, NULL);
- ioc_inode->weight = weight;
-
- ioc_table_lock (table);
- {
- table->inode_count++;
- list_add (&ioc_inode->inode_list, &table->inodes);
- list_add_tail (&ioc_inode->inode_lru,
- &table->inode_lru[weight]);
- }
- ioc_table_unlock (table);
+ ioc_inode->inode = inode;
+ ioc_inode->table = table;
+ INIT_LIST_HEAD(&ioc_inode->cache.page_lru);
+ pthread_mutex_init(&ioc_inode->inode_lock, NULL);
+ ioc_inode->weight = weight;
+
+ ioc_table_lock(table);
+ {
+ table->inode_count++;
+ list_add(&ioc_inode->inode_list, &table->inodes);
+ list_add_tail(&ioc_inode->inode_lru, &table->inode_lru[weight]);
+ }
+ ioc_table_unlock(table);
- gf_log (table->xl->name, GF_LOG_TRACE,
- "adding to inode_lru[%d]", weight);
+ gf_msg_trace(table->xl->name, 0, "adding to inode_lru[%d]", weight);
out:
- return ioc_inode;
+ return ioc_inode;
}
-
/*
* ioc_inode_destroy - destroy an ioc_inode_t object.
*
@@ -214,27 +201,27 @@ out:
* to be called only from ioc_forget.
*/
void
-ioc_inode_destroy (ioc_inode_t *ioc_inode)
+ioc_inode_destroy(ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = NULL;
+ ioc_table_t *table = NULL;
- GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
- table = ioc_inode->table;
+ table = ioc_inode->table;
- ioc_table_lock (table);
- {
- table->inode_count--;
- list_del (&ioc_inode->inode_list);
- list_del (&ioc_inode->inode_lru);
- }
- ioc_table_unlock (table);
+ ioc_table_lock(table);
+ {
+ table->inode_count--;
+ list_del(&ioc_inode->inode_list);
+ list_del(&ioc_inode->inode_lru);
+ }
+ ioc_table_unlock(table);
- ioc_inode_flush (ioc_inode);
- rbthash_table_destroy (ioc_inode->cache.page_table);
+ ioc_inode_flush(ioc_inode);
+ rbthash_table_destroy(ioc_inode->cache.page_table);
- pthread_mutex_destroy (&ioc_inode->inode_lock);
- GF_FREE (ioc_inode);
+ pthread_mutex_destroy(&ioc_inode->inode_lock);
+ GF_FREE(ioc_inode);
out:
- return;
+ return;
}
diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h
index 9b68f9fce5f..20c9a12021e 100644
--- a/xlators/performance/io-cache/src/ioc-mem-types.h
+++ b/xlators/performance/io-cache/src/ioc-mem-types.h
@@ -11,19 +11,19 @@
#ifndef __IOC_MT_H__
#define __IOC_MT_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_ioc_mem_types_ {
- gf_ioc_mt_iovec = gf_common_mt_end + 1,
- gf_ioc_mt_ioc_table_t,
- gf_ioc_mt_char,
- gf_ioc_mt_ioc_waitq_t,
- gf_ioc_mt_ioc_priority,
- gf_ioc_mt_list_head,
- gf_ioc_mt_call_pool_t,
- gf_ioc_mt_ioc_inode_t,
- gf_ioc_mt_ioc_fill_t,
- gf_ioc_mt_ioc_newpage_t,
- gf_ioc_mt_end
+ gf_ioc_mt_iovec = gf_common_mt_end + 1,
+ gf_ioc_mt_ioc_table_t,
+ gf_ioc_mt_char,
+ gf_ioc_mt_ioc_waitq_t,
+ gf_ioc_mt_ioc_priority,
+ gf_ioc_mt_list_head,
+ gf_ioc_mt_call_pool_t,
+ gf_ioc_mt_ioc_inode_t,
+ gf_ioc_mt_ioc_fill_t,
+ gf_ioc_mt_ioc_newpage_t,
+ gf_ioc_mt_end
};
#endif
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index b2e20ba659f..84b1ae6cb20 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -8,81 +8,73 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "io-cache.h"
#include "ioc-mem-types.h"
#include <assert.h>
#include <sys/time.h>
-
+#include "io-cache-messages.h"
char
-ioc_empty (struct ioc_cache *cache)
+ioc_empty(struct ioc_cache *cache)
{
- char is_empty = -1;
+ char is_empty = -1;
- GF_VALIDATE_OR_GOTO ("io-cache", cache, out);
+ GF_VALIDATE_OR_GOTO("io-cache", cache, out);
- is_empty = list_empty (&cache->page_lru);
+ is_empty = list_empty(&cache->page_lru);
out:
- return is_empty;
+ return is_empty;
}
-
ioc_page_t *
-__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_get(ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_page_t *page = NULL;
- ioc_table_t *table = NULL;
- off_t rounded_offset = 0;
+ ioc_page_t *page = NULL;
+ ioc_table_t *table = NULL;
+ off_t rounded_offset = 0;
- GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
- table = ioc_inode->table;
- GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+ table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
- rounded_offset = floor (offset, table->page_size);
+ rounded_offset = gf_floor(offset, table->page_size);
- page = rbthash_get (ioc_inode->cache.page_table, &rounded_offset,
- sizeof (rounded_offset));
+ page = rbthash_get(ioc_inode->cache.page_table, &rounded_offset,
+ sizeof(rounded_offset));
- if (page != NULL) {
- /* push the page to the end of the lru list */
- list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
- }
+ if (page != NULL) {
+ /* push the page to the end of the lru list */
+ list_move_tail(&page->page_lru, &ioc_inode->cache.page_lru);
+ }
out:
- return page;
+ return page;
}
-
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+ioc_page_get(ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_page_t *page = NULL;
+ ioc_page_t *page = NULL;
- if (ioc_inode == NULL) {
- goto out;
- }
+ if (ioc_inode == NULL) {
+ goto out;
+ }
- ioc_inode_lock (ioc_inode);
- {
- page = __ioc_page_get (ioc_inode, offset);
- }
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_lock(ioc_inode);
+ {
+ page = __ioc_page_get(ioc_inode, offset);
+ }
+ ioc_inode_unlock(ioc_inode);
out:
- return page;
+ return page;
}
-
/*
* __ioc_page_destroy -
*
@@ -90,103 +82,108 @@ out:
*
*/
int64_t
-__ioc_page_destroy (ioc_page_t *page)
+__ioc_page_destroy(ioc_page_t *page)
{
- int64_t page_size = 0;
-
- GF_VALIDATE_OR_GOTO ("io-cache", page, out);
-
- if (page->iobref)
- page_size = iobref_size (page->iobref);
-
- if (page->waitq) {
- /* frames waiting on this page, do not destroy this page */
- page_size = -1;
- page->stale = 1;
- } else {
- rbthash_remove (page->inode->cache.page_table, &page->offset,
- sizeof (page->offset));
- list_del (&page->page_lru);
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "destroying page = %p, offset = %"PRId64" "
- "&& inode = %p",
- page, page->offset, page->inode);
-
- if (page->vector){
- iobref_unref (page->iobref);
- GF_FREE (page->vector);
- page->vector = NULL;
- }
-
- page->inode = NULL;
+ int64_t page_size = 0;
+
+ GF_VALIDATE_OR_GOTO("io-cache", page, out);
+
+ if (page->iobref)
+ page_size = iobref_size(page->iobref);
+
+ if (page->waitq) {
+ /* frames waiting on this page, do not destroy this page */
+ page_size = -1;
+ page->stale = 1;
+ } else {
+ rbthash_remove(page->inode->cache.page_table, &page->offset,
+ sizeof(page->offset));
+ list_del(&page->page_lru);
+
+ gf_msg_trace(page->inode->table->xl->name, 0,
+ "destroying page = %p, offset = %" PRId64
+ " "
+ "&& inode = %p",
+ page, page->offset, page->inode);
+
+ if (page->vector) {
+ iobref_unref(page->iobref);
+ GF_FREE(page->vector);
+ page->vector = NULL;
}
- if (page_size != -1) {
- pthread_mutex_destroy (&page->page_lock);
- GF_FREE (page);
- }
+ page->inode = NULL;
+ }
+
+ if (page_size != -1) {
+ pthread_mutex_destroy(&page->page_lock);
+ GF_FREE(page);
+ }
out:
- return page_size;
+ return page_size;
}
-
int64_t
-ioc_page_destroy (ioc_page_t *page)
+ioc_page_destroy(ioc_page_t *page)
{
- int64_t ret = 0;
+ int64_t ret = 0;
+ struct ioc_inode *inode = NULL;
- if (page == NULL) {
- goto out;
- }
+ if (page == NULL) {
+ goto out;
+ }
- ioc_inode_lock (page->inode);
- {
- ret = __ioc_page_destroy (page);
- }
- ioc_inode_unlock (page->inode);
+ ioc_inode_lock(page->inode);
+ {
+ inode = page->inode;
+ ret = __ioc_page_destroy(page);
+ }
+ ioc_inode_unlock(inode);
out:
- return ret;
+ return ret;
}
int32_t
-__ioc_inode_prune (ioc_inode_t *curr, uint64_t *size_pruned,
- uint64_t size_to_prune, uint32_t index)
+__ioc_inode_prune(ioc_inode_t *curr, uint64_t *size_pruned,
+ uint64_t size_to_prune, uint32_t index)
{
- ioc_page_t *page = NULL, *next = NULL;
- int32_t ret = 0;
- ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL, *next = NULL;
+ int32_t ret = 0;
+ ioc_table_t *table = NULL;
- if (curr == NULL) {
- goto out;
- }
+ if (curr == NULL) {
+ goto out;
+ }
- table = curr->table;
+ table = curr->table;
- list_for_each_entry_safe (page, next, &curr->cache.page_lru, page_lru) {
- *size_pruned += page->size;
- ret = __ioc_page_destroy (page);
+ list_for_each_entry_safe(page, next, &curr->cache.page_lru, page_lru)
+ {
+ *size_pruned += page->size;
+ ret = __ioc_page_destroy(page);
- if (ret != -1)
- table->cache_used -= ret;
+ if (ret != -1)
+ table->cache_used -= ret;
- gf_log (table->xl->name, GF_LOG_TRACE,
- "index = %d && table->cache_used = %"PRIu64" && table->"
- "cache_size = %"PRIu64, index, table->cache_used,
- table->cache_size);
+ gf_msg_trace(table->xl->name, 0,
+ "index = %d && "
+ "table->cache_used = %" PRIu64
+ " && table->"
+ "cache_size = %" PRIu64,
+ index, table->cache_used, table->cache_size);
- if ((*size_pruned) >= size_to_prune)
- break;
- }
+ if ((*size_pruned) >= size_to_prune)
+ break;
+ }
- if (ioc_empty (&curr->cache)) {
- list_del_init (&curr->inode_lru);
- }
+ if (ioc_empty(&curr->cache)) {
+ list_del_init(&curr->inode_lru);
+ }
out:
- return 0;
+ return 0;
}
/*
* ioc_prune - prune the cache. we have a limit to the number of pages we
@@ -196,46 +193,44 @@ out:
*
*/
int32_t
-ioc_prune (ioc_table_t *table)
+ioc_prune(ioc_table_t *table)
{
- ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
- int32_t index = 0;
- uint64_t size_to_prune = 0;
- uint64_t size_pruned = 0;
+ ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
+ int32_t index = 0;
+ uint64_t size_to_prune = 0;
+ uint64_t size_pruned = 0;
+
+ GF_VALIDATE_OR_GOTO("io-cache", table, out);
+
+ ioc_table_lock(table);
+ {
+ size_to_prune = table->cache_used - table->cache_size;
+ /* take out the least recently used inode */
+ for (index = 0; index < table->max_pri; index++) {
+ list_for_each_entry_safe(curr, next_ioc_inode,
+ &table->inode_lru[index], inode_lru)
+ {
+ /* prune page-by-page for this inode, till
+ * we reach the equilibrium */
+ ioc_inode_lock(curr);
+ {
+ __ioc_inode_prune(curr, &size_pruned, size_to_prune, index);
+ }
+ ioc_inode_unlock(curr);
- GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* list_for_each_entry_safe (curr...) */
- ioc_table_lock (table);
- {
- size_to_prune = table->cache_used - table->cache_size;
- /* take out the least recently used inode */
- for (index=0; index < table->max_pri; index++) {
- list_for_each_entry_safe (curr, next_ioc_inode,
- &table->inode_lru[index],
- inode_lru) {
- /* prune page-by-page for this inode, till
- * we reach the equilibrium */
- ioc_inode_lock (curr);
- {
- __ioc_inode_prune (curr, &size_pruned,
- size_to_prune,
- index);
- }
- ioc_inode_unlock (curr);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe (curr...) */
-
- if (size_pruned >= size_to_prune)
- break;
- } /* for(index=0;...) */
-
- } /* ioc_inode_table locked region end */
- ioc_table_unlock (table);
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* for(index=0;...) */
+
+ } /* ioc_inode_table locked region end */
+ ioc_table_unlock(table);
out:
- return 0;
+ return 0;
}
/*
@@ -246,47 +241,46 @@ out:
*
*/
ioc_page_t *
-__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_create(ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t rounded_offset = 0;
- ioc_page_t *newpage = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ioc_page_t *newpage = NULL;
- GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
- table = ioc_inode->table;
- GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+ table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO("io-cache", table, out);
- rounded_offset = floor (offset, table->page_size);
+ rounded_offset = gf_floor(offset, table->page_size);
- newpage = GF_CALLOC (1, sizeof (*newpage), gf_ioc_mt_ioc_newpage_t);
- if (newpage == NULL) {
- goto out;
- }
+ newpage = GF_CALLOC(1, sizeof(*newpage), gf_ioc_mt_ioc_newpage_t);
+ if (newpage == NULL) {
+ goto out;
+ }
- if (!ioc_inode) {
- GF_FREE (newpage);
- newpage = NULL;
- goto out;
- }
+ if (!ioc_inode) {
+ GF_FREE(newpage);
+ newpage = NULL;
+ goto out;
+ }
- newpage->offset = rounded_offset;
- newpage->inode = ioc_inode;
- pthread_mutex_init (&newpage->page_lock, NULL);
+ newpage->offset = rounded_offset;
+ newpage->inode = ioc_inode;
+ pthread_mutex_init(&newpage->page_lock, NULL);
- rbthash_insert (ioc_inode->cache.page_table, newpage, &rounded_offset,
- sizeof (rounded_offset));
+ rbthash_insert(ioc_inode->cache.page_table, newpage, &rounded_offset,
+ sizeof(rounded_offset));
- list_add_tail (&newpage->page_lru, &ioc_inode->cache.page_lru);
+ list_add_tail(&newpage->page_lru, &ioc_inode->cache.page_lru);
- page = newpage;
+ page = newpage;
- gf_log ("io-cache", GF_LOG_TRACE,
- "returning new page %p", page);
+ gf_msg_trace("io-cache", 0, "returning new page %p", page);
out:
- return page;
+ return page;
}
/*
@@ -299,54 +293,55 @@ out:
*
*/
void
-__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
+__ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size)
{
- ioc_waitq_t *waitq = NULL;
- ioc_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
- local = frame->local;
-
- GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
-
- if (page == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_WARNING,
- "asked to wait on a NULL page");
- }
-
- waitq = GF_CALLOC (1, sizeof (*waitq), gf_ioc_mt_ioc_waitq_t);
- if (waitq == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) waiting on page = %p, offset=%"PRId64", "
- "size=%"GF_PRI_SIZET"",
- frame, page, offset, size);
-
- waitq->data = frame;
- waitq->next = page->waitq;
- waitq->pending_offset = offset;
- waitq->pending_size = size;
- page->waitq = waitq;
- /* one frame can wait only once on a given page,
- * local->wait_count is number of pages a frame is waiting on */
- ioc_local_lock (local);
- {
- local->wait_count++;
- }
- ioc_local_unlock (local);
+ ioc_waitq_t *waitq = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("io-cache", frame, out);
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO(frame->this->name, local, out);
+
+ if (page == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_NULL_PAGE_WAIT, NULL);
+ goto out;
+ }
+
+ waitq = GF_CALLOC(1, sizeof(*waitq), gf_ioc_mt_ioc_waitq_t);
+ if (waitq == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ gf_msg_trace(frame->this->name, 0,
+ "frame(%p) waiting on page = %p, offset=%" PRId64
+ ", "
+ "size=%" GF_PRI_SIZET "",
+ frame, page, offset, size);
+
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ waitq->pending_offset = offset;
+ waitq->pending_size = size;
+ page->waitq = waitq;
+ /* one frame can wait only once on a given page,
+ * local->wait_count is number of pages a frame is waiting on */
+ ioc_local_lock(local);
+ {
+ local->wait_count++;
+ }
+ ioc_local_unlock(local);
out:
- return;
+ return;
}
-
/*
* ioc_cache_still_valid - see if cached pages ioc_inode are still valid
* against given stbuf
@@ -357,11 +352,11 @@ out:
* assumes ioc_inode is locked
*/
int8_t
-ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf)
+ioc_cache_still_valid(ioc_inode_t *ioc_inode, struct iatt *stbuf)
{
- int8_t cache_still_valid = 1;
+ int8_t cache_still_valid = 1;
- GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
#if 0
if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
@@ -369,9 +364,9 @@ ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf)
cache_still_valid = 0;
#else
- if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime)
- || (stbuf->ia_mtime_nsec != ioc_inode->cache.mtime_nsec))
- cache_still_valid = 0;
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
+ (stbuf->ia_mtime_nsec != ioc_inode->cache.mtime_nsec))
+ cache_still_valid = 0;
#endif
@@ -384,182 +379,173 @@ ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf)
#endif
out:
- return cache_still_valid;
+ return cache_still_valid;
}
-
void
-ioc_waitq_return (ioc_waitq_t *waitq)
+ioc_waitq_return(ioc_waitq_t *waitq)
{
- ioc_waitq_t *trav = NULL;
- ioc_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ioc_waitq_t *trav = NULL;
+ ioc_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ioc_frame_return (frame);
- GF_FREE (trav);
- }
+ frame = trav->data;
+ ioc_frame_return(frame);
+ GF_FREE(trav);
+ }
}
-
int
-ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
+ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- off_t offset = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- int32_t destroy_size = 0;
- size_t page_size = 0;
- ioc_waitq_t *waitq = NULL;
- size_t iobref_page_size = 0;
- char zero_filled = 0;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- GF_ASSERT (local);
-
- offset = local->pending_offset;
- ioc_inode = local->inode;
- GF_ASSERT (ioc_inode);
+ ioc_local_t *local = NULL;
+ off_t offset = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int32_t destroy_size = 0;
+ size_t page_size = 0;
+ ioc_waitq_t *waitq = NULL;
+ size_t iobref_page_size = 0;
+ char zero_filled = 0;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ GF_ASSERT(local);
+
+ offset = local->pending_offset;
+ ioc_inode = local->inode;
+ GF_ASSERT(ioc_inode);
+
+ table = ioc_inode->table;
+ GF_ASSERT(table);
+
+ zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0));
+
+ ioc_inode_lock(ioc_inode);
+ {
+ if (op_ret == -1 ||
+ !(zero_filled || ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_msg_trace(ioc_inode->table->xl->name, 0,
+ "cache for inode(%p) is invalid. flushing "
+ "all pages",
+ ioc_inode);
+ destroy_size = __ioc_inode_flush(ioc_inode);
+ }
- table = ioc_inode->table;
- GF_ASSERT (table);
+ if ((op_ret >= 0) && !zero_filled) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
- zero_filled = ((op_ret >=0) && (stbuf->ia_mtime == 0));
+ ioc_inode->cache.last_revalidate = gf_time();
- ioc_inode_lock (ioc_inode);
- {
- if (op_ret == -1 || !(zero_filled ||
- ioc_cache_still_valid(ioc_inode,
- stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "cache for inode(%p) is invalid. flushing "
- "all pages", ioc_inode);
- destroy_size = __ioc_inode_flush (ioc_inode);
+ if (op_ret < 0) {
+ /* error, readv returned -1 */
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page)
+ waitq = __ioc_page_error(page, op_ret, op_errno);
+ } else {
+ gf_msg_trace(ioc_inode->table->xl->name, 0, "op_ret = %d", op_ret);
+ page = __ioc_page_get(ioc_inode, offset);
+ if (!page) {
+ /* page was flushed */
+ /* some serious bug ? */
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_WASTED_COPY, "offset=%" PRId64, offset,
+ "page-size=%" PRId64, table->page_size, "ioc_inode=%p",
+ ioc_inode, NULL);
+ } else {
+ if (page->vector) {
+ iobref_unref(page->iobref);
+ GF_FREE(page->vector);
+ page->vector = NULL;
+ page->iobref = NULL;
}
- if ((op_ret >= 0) && !zero_filled) {
- ioc_inode->cache.mtime = stbuf->ia_mtime;
- ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ /* keep a copy of the page for our cache */
+ page->vector = iov_dup(vector, count);
+ if (page->vector == NULL) {
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page != NULL)
+ waitq = __ioc_page_error(page, -1, ENOMEM);
+ goto unlock;
}
- gettimeofday (&ioc_inode->cache.tv, NULL);
-
- if (op_ret < 0) {
- /* error, readv returned -1 */
- page = __ioc_page_get (ioc_inode, offset);
- if (page)
- waitq = __ioc_page_error (page, op_ret,
- op_errno);
+ page->count = count;
+ if (iobref) {
+ page->iobref = iobref_ref(iobref);
} else {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "op_ret = %d", op_ret);
- page = __ioc_page_get (ioc_inode, offset);
- if (!page) {
- /* page was flushed */
- /* some serious bug ? */
- gf_log (frame->this->name, GF_LOG_WARNING,
- "wasted copy: %"PRId64"[+%"PRId64"] "
- "ioc_inode=%p", offset,
- table->page_size, ioc_inode);
- } else {
- if (page->vector) {
- iobref_unref (page->iobref);
- GF_FREE (page->vector);
- page->vector = NULL;
- }
-
- /* keep a copy of the page for our cache */
- page->vector = iov_dup (vector, count);
- if (page->vector == NULL) {
- page = __ioc_page_get (ioc_inode,
- offset);
- if (page != NULL)
- waitq = __ioc_page_error (page,
- -1,
- ENOMEM);
- goto unlock;
- }
-
- page->count = count;
- if (iobref) {
- page->iobref = iobref_ref (iobref);
- } else {
- /* TODO: we have got a response to
- * our request and no data */
- gf_log (frame->this->name,
- GF_LOG_CRITICAL,
- "frame>root>rsp_refs is null");
- } /* if(frame->root->rsp_refs) */
-
- /* page->size should indicate exactly how
- * much the readv call to the child
- * translator returned. earlier op_ret
- * from child translator was used, which
- * gave rise to a bug where reads from
- * io-cached volume were resulting in 0
- * byte replies */
- page_size = iov_length(vector, count);
- page->size = page_size;
- page->op_errno = op_errno;
-
- iobref_page_size = iobref_size (page->iobref);
-
- if (page->waitq) {
- /* wake up all the frames waiting on
- * this page, including
- * the frame which triggered fault */
- waitq = __ioc_page_wakeup (page,
- op_errno);
- } /* if(page->waitq) */
- } /* if(!page)...else */
- } /* if(op_ret < 0)...else */
- } /* ioc_inode locked region end */
+ /* TODO: we have got a response to
+ * our request and no data */
+ gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,
+ IO_CACHE_MSG_FRAME_NULL, NULL);
+ } /* if(frame->root->rsp_refs) */
+
+ /* page->size should indicate exactly how
+ * much the readv call to the child
+ * translator returned. earlier op_ret
+ * from child translator was used, which
+ * gave rise to a bug where reads from
+ * io-cached volume were resulting in 0
+ * byte replies */
+ page_size = iov_length(vector, count);
+ page->size = page_size;
+ page->op_errno = op_errno;
+
+ iobref_page_size = iobref_size(page->iobref);
+
+ if (page->waitq) {
+ /* wake up all the frames waiting on
+ * this page, including
+ * the frame which triggered fault */
+ waitq = __ioc_page_wakeup(page, op_errno);
+ } /* if(page->waitq) */
+ } /* if(!page)...else */
+ } /* if(op_ret < 0)...else */
+ } /* ioc_inode locked region end */
unlock:
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_unlock(ioc_inode);
- ioc_waitq_return (waitq);
+ ioc_waitq_return(waitq);
- if (iobref_page_size) {
- ioc_table_lock (table);
- {
- table->cache_used += iobref_page_size;
- }
- ioc_table_unlock (table);
+ if (iobref_page_size) {
+ ioc_table_lock(table);
+ {
+ table->cache_used += iobref_page_size;
}
+ ioc_table_unlock(table);
+ }
- if (destroy_size) {
- ioc_table_lock (table);
- {
- table->cache_used -= destroy_size;
- }
- ioc_table_unlock (table);
+ if (destroy_size) {
+ ioc_table_lock(table);
+ {
+ table->cache_used -= destroy_size;
}
+ ioc_table_unlock(table);
+ }
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune(ioc_inode->table)) {
+ ioc_prune(ioc_inode->table);
+ }
- gf_log (frame->this->name, GF_LOG_TRACE, "fault frame %p returned",
- frame);
- pthread_mutex_destroy (&local->local_lock);
+ gf_msg_trace(frame->this->name, 0, "fault frame %p returned", frame);
+ pthread_mutex_destroy(&local->local_lock);
- fd_unref (local->fd);
+ fd_unref(local->fd);
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-
/*
* ioc_page_fault -
*
@@ -570,219 +556,212 @@ unlock:
*
*/
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset)
+ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset)
{
- ioc_table_t *table = NULL;
- call_frame_t *fault_frame = NULL;
- ioc_local_t *fault_local = NULL;
- int32_t op_ret = -1, op_errno = -1;
- ioc_waitq_t *waitq = NULL;
- ioc_page_t *page = NULL;
-
- GF_ASSERT (ioc_inode);
- if (frame == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- gf_log ("io-cache", GF_LOG_WARNING,
- "page fault on a NULL frame");
- goto err;
- }
-
- table = ioc_inode->table;
- fault_frame = copy_frame (frame);
- if (fault_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- fault_local = mem_get0 (THIS->local_pool);
- if (fault_local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- STACK_DESTROY (fault_frame->root);
- goto err;
- }
-
- /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
- * are using till now won't be valid till we get reply from server.
- * we unref this fd, in fault_cbk */
- fault_local->fd = fd_ref (fd);
-
- fault_frame->local = fault_local;
- pthread_mutex_init (&fault_local->local_lock, NULL);
-
- INIT_LIST_HEAD (&fault_local->fill_list);
- fault_local->pending_offset = offset;
- fault_local->pending_size = table->page_size;
- fault_local->inode = ioc_inode;
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "stack winding page fault for offset = %"PRId64" with "
- "frame %p", offset, fault_frame);
-
- STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
- FIRST_CHILD(fault_frame->this)->fops->readv, fd,
- table->page_size, offset, 0, NULL);
- return;
+ ioc_table_t *table = NULL;
+ call_frame_t *fault_frame = NULL;
+ ioc_local_t *fault_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+ ioc_waitq_t *waitq = NULL;
+ ioc_page_t *page = NULL;
+
+ GF_ASSERT(ioc_inode);
+ if (frame == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg("io-cache", GF_LOG_WARNING, EINVAL, IO_CACHE_MSG_PAGE_FAULT,
+ NULL);
+ goto err;
+ }
+
+ table = ioc_inode->table;
+ fault_frame = copy_frame(frame);
+ if (fault_frame == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = frame->local;
+ fault_local = mem_get0(THIS->local_pool);
+ if (fault_local == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ STACK_DESTROY(fault_frame->root);
+ goto err;
+ }
+
+ /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
+ * are using till now won't be valid till we get reply from server.
+ * we unref this fd, in fault_cbk */
+ fault_local->fd = fd_ref(fd);
+
+ fault_frame->local = fault_local;
+ pthread_mutex_init(&fault_local->local_lock, NULL);
+
+ INIT_LIST_HEAD(&fault_local->fill_list);
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = table->page_size;
+ fault_local->inode = ioc_inode;
+
+ if (local && local->xattr_req)
+ fault_local->xattr_req = dict_ref(local->xattr_req);
+
+ gf_msg_trace(frame->this->name, 0,
+ "stack winding page fault for offset = %" PRId64
+ " with "
+ "frame %p",
+ offset, fault_frame);
+
+ STACK_WIND(fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv, fd,
+ table->page_size, offset, 0, fault_local->xattr_req);
+ return;
err:
- ioc_inode_lock (ioc_inode);
- {
- page = __ioc_page_get (ioc_inode, offset);
- if (page != NULL) {
- waitq = __ioc_page_error (page, op_ret, op_errno);
- }
+ ioc_inode_lock(ioc_inode);
+ {
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page != NULL) {
+ waitq = __ioc_page_error(page, op_ret, op_errno);
}
- ioc_inode_unlock (ioc_inode);
+ }
+ ioc_inode_unlock(ioc_inode);
- if (waitq != NULL) {
- ioc_waitq_return (waitq);
- }
+ if (waitq != NULL) {
+ ioc_waitq_return(waitq);
+ }
}
-
int32_t
-__ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size, int32_t op_errno)
+__ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size, int32_t op_errno)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_fill_t *new = NULL;
- int8_t found = 0;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
-
- if (page == NULL) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "NULL page has been provided to serve read request");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- goto out;
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_fill_t *new = NULL;
+ int8_t found = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("io-cache", frame, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(frame->this->name, local, out);
+
+ if (page == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_SERVE_READ_REQUEST, NULL);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ ioc_inode = page->inode;
+
+ gf_msg_trace(frame->this->name, 0,
+ "frame (%p) offset = %" PRId64 " && size = %" GF_PRI_SIZET
+ " "
+ "&& page->size = %" GF_PRI_SIZET " && wait_count = %d",
+ frame, offset, size, page->size, local->wait_count);
+
+ /* immediately move this page to the end of the page_lru list */
+ list_move_tail(&page->page_lru, &ioc_inode->cache.page_lru);
+ /* fill local->pending_size bytes from local->pending_offset */
+ if (local->op_ret != -1) {
+ local->op_errno = op_errno;
+
+ if (page->size == 0) {
+ goto done;
}
- ioc_inode = page->inode;
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
- "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
- frame, offset, size, page->size, local->wait_count);
-
- /* immediately move this page to the end of the page_lru list */
- list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
- /* fill local->pending_size bytes from local->pending_offset */
- if (local->op_ret != -1) {
- local->op_errno = op_errno;
-
- if (page->size == 0) {
- goto done;
- }
+ if (offset > page->offset)
+ /* offset is offset in file, convert it to offset in
+ * page */
+ src_offset = offset - page->offset;
+ /*FIXME: since offset is the offset within page is the
+ * else case valid? */
+ else
+ /* local->pending_offset is in previous page. do not
+ * fill until we have filled all previous pages */
+ dst_offset = page->offset - offset;
+
+ /* we have to copy from offset to either end of this page
+ * or till the requested size */
+ copy_size = min(page->size - src_offset, size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
- if (offset > page->offset)
- /* offset is offset in file, convert it to offset in
- * page */
- src_offset = offset - page->offset;
- /*FIXME: since offset is the offset within page is the
- * else case valid? */
- else
- /* local->pending_offset is in previous page. do not
- * fill until we have filled all previous pages */
- dst_offset = page->offset - offset;
-
- /* we have to copy from offset to either end of this page
- * or till the requested size */
- copy_size = min (page->size - src_offset,
- size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
+ gf_msg_trace(page->inode->table->xl->name, 0,
+ "copy_size = %" GF_PRI_SIZET
+ " && src_offset = "
+ "%" PRId64 " && dst_offset = %" PRId64 "",
+ copy_size, src_offset, dst_offset);
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "copy_size = %"GF_PRI_SIZET" && src_offset = "
- "%"PRId64" && dst_offset = %"PRId64"",
- copy_size, src_offset, dst_offset);
+ {
+ new = GF_CALLOC(1, sizeof(*new), gf_ioc_mt_ioc_fill_t);
+ if (new == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref(page->iobref);
+ new->count = iov_subset(page->vector, page->count, src_offset,
+ copy_size, &new->vector, 0);
+ if (new->count < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(new->iobref);
+ GF_FREE(new);
+ goto out;
+ }
+
+ /* add the ioc_fill to fill_list for this frame */
+ if (list_empty(&local->fill_list)) {
+ /* if list is empty, then this is the first
+ * time we are filling frame, add the
+ * ioc_fill_t to the end of list */
+ list_add_tail(&new->list, &local->fill_list);
+ } else {
+ found = 0;
+ /* list is not empty, we need to look for
+ * where this offset fits in list */
+ list_for_each_entry(fill, &local->fill_list, list)
{
- new = GF_CALLOC (1, sizeof (*new),
- gf_ioc_mt_ioc_fill_t);
- if (new == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
-
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector, page->count,
- src_offset,
- src_offset + copy_size,
- NULL);
-
- new->vector = GF_CALLOC (new->count,
- sizeof (struct iovec),
- gf_ioc_mt_iovec);
- if (new->vector == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
-
- iobref_unref (new->iobref);
- GF_FREE (new);
- goto out;
- }
-
- new->count = iov_subset (page->vector, page->count,
- src_offset,
- src_offset + copy_size,
- new->vector);
-
- /* add the ioc_fill to fill_list for this frame */
- if (list_empty (&local->fill_list)) {
- /* if list is empty, then this is the first
- * time we are filling frame, add the
- * ioc_fill_t to the end of list */
- list_add_tail (&new->list, &local->fill_list);
- } else {
- found = 0;
- /* list is not empty, we need to look for
- * where this offset fits in list */
- list_for_each_entry (fill, &local->fill_list,
- list) {
- if (fill->offset > new->offset) {
- found = 1;
- break;
- }
- }
-
- if (found) {
- list_add_tail (&new->list,
- &fill->list);
- } else {
- list_add_tail (&new->list,
- &local->fill_list);
- }
- }
+ if (fill->offset > new->offset) {
+ found = 1;
+ break;
+ }
}
- local->op_ret += copy_size;
+ if (found) {
+ list_add_tail(&new->list, &fill->list);
+ } else {
+ list_add_tail(&new->list, &local->fill_list);
+ }
+ }
}
+ local->op_ret += copy_size;
+ }
+
done:
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
/*
@@ -795,103 +774,109 @@ out:
*
*/
static void
-ioc_frame_unwind (call_frame_t *frame)
+ioc_frame_unwind(call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL, *next = NULL;
- int32_t count = 0;
- struct iovec *vector = NULL;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- struct iatt stbuf = {0,};
- int32_t op_ret = 0, op_errno = 0;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- if (local == NULL) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "local is NULL");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (local->op_ret < 0) {
- op_ret = local->op_ret;
- op_errno = local->op_errno;
- goto unwind;
- }
-
- // ioc_local_lock (local);
- iobref = iobref_new ();
- if (iobref == NULL) {
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL, *next = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int32_t op_ret = 0, op_errno = 0;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_LOCAL_NULL, NULL);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0) {
+ op_ret = local->op_ret;
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ // ioc_local_lock (local);
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+
+ if (list_empty(&local->fill_list)) {
+ gf_msg_trace(frame->this->name, 0,
+ "frame(%p) has 0 entries in local->fill_list "
+ "(offset = %" PRId64 " && size = %" GF_PRI_SIZET ")",
+ frame, local->offset, local->size);
+ }
+
+ list_for_each_entry(fill, &local->fill_list, list) { count += fill->count; }
+
+ vector = GF_CALLOC(count, sizeof(*vector), gf_ioc_mt_iovec);
+ if (vector == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+
+ list_for_each_entry_safe(fill, next, &local->fill_list, list)
+ {
+ /* # TODO: check why this if clause is needed at all. */
+ if ((vector != NULL) && (iobref != NULL)) {
+ memcpy(((char *)vector) + copied, fill->vector,
+ fill->count * sizeof(*vector));
+
+ copied += (fill->count * sizeof(*vector));
+
+ if (iobref_merge(iobref, fill->iobref)) {
op_ret = -1;
op_errno = ENOMEM;
+ }
}
- if (list_empty (&local->fill_list)) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) has 0 entries in local->fill_list "
- "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
- frame, local->offset, local->size);
- }
-
- list_for_each_entry (fill, &local->fill_list, list) {
- count += fill->count;
- }
+ list_del(&fill->list);
+ iobref_unref(fill->iobref);
+ GF_FREE(fill->vector);
+ GF_FREE(fill);
+ }
- vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec);
- if (vector == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
-
- list_for_each_entry_safe (fill, next, &local->fill_list, list) {
- if ((vector != NULL) && (iobref != NULL)) {
- memcpy (((char *)vector) + copied,
- fill->vector,
- fill->count * sizeof (*vector));
-
- copied += (fill->count * sizeof (*vector));
-
- iobref_merge (iobref, fill->iobref);
- }
-
- list_del (&fill->list);
- iobref_unref (fill->iobref);
- GF_FREE (fill->vector);
- GF_FREE (fill);
- }
-
- if (op_ret != -1) {
- op_ret = iov_length (vector, count);
- }
+ if (op_ret != -1) {
+ op_ret = iov_length(vector, count);
+ }
unwind:
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) unwinding with op_ret=%d", frame, op_ret);
-
- // ioc_local_unlock (local);
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
- count, &stbuf, iobref, NULL);
-
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
-
- if (vector != NULL) {
- GF_FREE (vector);
- vector = NULL;
- }
-
- pthread_mutex_destroy (&local->local_lock);
- if (local)
- mem_put (local);
-
- return;
+ gf_msg_trace(frame->this->name, 0, "frame(%p) unwinding with op_ret=%d",
+ frame, op_ret);
+
+ // ioc_local_unlock (local);
+
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, &stbuf,
+ iobref, NULL);
+
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ if (vector != NULL) {
+ GF_FREE(vector);
+ vector = NULL;
+ }
+
+ if (local) {
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+ pthread_mutex_destroy(&local->local_lock);
+ mem_put(local);
+ }
+ return;
}
/*
@@ -901,27 +886,27 @@ unwind:
* to be called only when a frame is waiting on an in-transit page
*/
void
-ioc_frame_return (call_frame_t *frame)
+ioc_frame_return(call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- int32_t wait_count = 0;
+ ioc_local_t *local = NULL;
+ int32_t wait_count = 0;
- GF_ASSERT (frame);
+ GF_ASSERT(frame);
- local = frame->local;
- GF_ASSERT (local->wait_count > 0);
+ local = frame->local;
+ GF_ASSERT(local->wait_count > 0);
- ioc_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ioc_local_unlock (local);
+ ioc_local_lock(local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ioc_local_unlock(local);
- if (!wait_count) {
- ioc_frame_unwind (frame);
- }
+ if (!wait_count) {
+ ioc_frame_unwind(frame);
+ }
- return;
+ return;
}
/*
@@ -931,41 +916,39 @@ ioc_frame_return (call_frame_t *frame)
* to be called only when a frame is waiting on an in-transit page
*/
ioc_waitq_t *
-__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno)
+__ioc_page_wakeup(ioc_page_t *page, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int32_t ret = -1;
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+ GF_VALIDATE_OR_GOTO("io-cache", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- page->ready = 1;
+ page->ready = 1;
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "page is %p && waitq = %p", page, waitq);
+ gf_msg_trace(page->inode->table->xl->name, 0, "page is %p && waitq = %p",
+ page, waitq);
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ret = __ioc_frame_fill (page, frame, trav->pending_offset,
- trav->pending_size, op_errno);
- if (ret == -1) {
- break;
- }
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ret = __ioc_frame_fill(page, frame, trav->pending_offset,
+ trav->pending_size, op_errno);
+ if (ret == -1) {
+ break;
}
+ }
- if (page->stale) {
- __ioc_page_destroy (page);
- }
+ if (page->stale) {
+ __ioc_page_destroy(page);
+ }
out:
- return waitq;
+ return waitq;
}
-
-
/*
* ioc_page_error -
* @page:
@@ -974,46 +957,45 @@ out:
*
*/
ioc_waitq_t *
-__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
+__ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int64_t ret = 0;
- ioc_table_t *table = NULL;
- ioc_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int64_t ret = 0;
+ ioc_table_t *table = NULL;
+ ioc_local_t *local = NULL;
- waitq = page->waitq;
- page->waitq = NULL;
+ GF_VALIDATE_OR_GOTO("io-cache", page, out);
- gf_log (page->inode->table->xl->name, GF_LOG_WARNING,
- "page error for page = %p & waitq = %p", page, waitq);
+ waitq = page->waitq;
+ page->waitq = NULL;
- for (trav = waitq; trav; trav = trav->next) {
+ gf_msg_debug(page->inode->table->xl->name, 0,
+ "page error for page = %p & waitq = %p", page, waitq);
- frame = trav->data;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
- local = frame->local;
- ioc_local_lock (local);
- {
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- ioc_local_unlock (local);
+ local = frame->local;
+ ioc_local_lock(local);
+ {
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
}
+ ioc_local_unlock(local);
+ }
- table = page->inode->table;
- ret = __ioc_page_destroy (page);
+ table = page->inode->table;
+ ret = __ioc_page_destroy(page);
- if (ret != -1) {
- table->cache_used -= ret;
- }
+ if (ret != -1) {
+ table->cache_used -= ret;
+ }
out:
- return waitq;
+ return waitq;
}
/*
@@ -1024,20 +1006,22 @@ out:
*
*/
ioc_waitq_t *
-ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
+ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL;
+ ioc_waitq_t *waitq = NULL;
+ struct ioc_inode *inode = NULL;
- if (page == NULL) {
- goto out;
- }
+ if (page == NULL) {
+ goto out;
+ }
- ioc_inode_lock (page->inode);
- {
- waitq = __ioc_page_error (page, op_ret, op_errno);
- }
- ioc_inode_unlock (page->inode);
+ ioc_inode_lock(page->inode);
+ {
+ inode = page->inode;
+ waitq = __ioc_page_error(page, op_ret, op_errno);
+ }
+ ioc_inode_unlock(inode);
out:
- return waitq;
+ return waitq;
}
diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am
index d63042e7c05..7570cf41ed2 100644
--- a/xlators/performance/io-threads/src/Makefile.am
+++ b/xlators/performance/io-threads/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = io-threads.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_threads_la_LDFLAGS = -module -avoid-version
+io_threads_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_threads_la_SOURCES = io-threads.c
io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-threads.h iot-mem-types.h
+noinst_HEADERS = io-threads.h iot-mem-types.h io-threads-messages.h
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/io-threads/src/io-threads-messages.h b/xlators/performance/io-threads/src/io-threads-messages.h
new file mode 100644
index 00000000000..6229c353f96
--- /dev/null
+++ b/xlators/performance/io-threads/src/io-threads-messages.h
@@ -0,0 +1,41 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IO_THREADS_MESSAGES_H_
+#define _IO_THREADS_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(IO_THREADS, IO_THREADS_MSG_INIT_FAILED,
+ IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, IO_THREADS_MSG_NO_MEMORY,
+ IO_THREADS_MSG_VOL_MISCONFIGURED, IO_THREADS_MSG_SIZE_NOT_SET,
+ IO_THREADS_MSG_OUT_OF_MEMORY, IO_THREADS_MSG_PTHREAD_INIT_FAILED,
+ IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED);
+
+#define IO_THREADS_MSG_INIT_FAILED_STR "Thread attribute initialization failed"
+#define IO_THREADS_MSG_SIZE_NOT_SET_STR "Using default thread stack size"
+#define IO_THREADS_MSG_NO_MEMORY_STR "Memory accounting init failed"
+#define IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED_STR \
+ "FATAL: iot not configured with exactly one child"
+#define IO_THREADS_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile"
+#define IO_THREADS_MSG_OUT_OF_MEMORY_STR "out of memory"
+#define IO_THREADS_MSG_PTHREAD_INIT_FAILED_STR "init failed"
+#define IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED_STR \
+ "cannot initialize worker threads, exiting init"
+#endif /* _IO_THREADS_MESSAGES_H_ */
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index c6bdc375439..3d24cc97f4b 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -8,258 +8,300 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "io-threads.h"
+#include <signal.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
-#include "locking.h"
+#include <glusterfs/locking.h>
+#include "io-threads-messages.h"
+#include <glusterfs/timespec.h>
-void *iot_worker (void *arg);
-int iot_workers_scale (iot_conf_t *conf);
-int __iot_workers_scale (iot_conf_t *conf);
+void *
+iot_worker(void *arg);
+int
+iot_workers_scale(iot_conf_t *conf);
+int
+__iot_workers_scale(iot_conf_t *conf);
struct volume_options options[];
+#define IOT_FOP(name, frame, this, args...) \
+ do { \
+ call_stub_t *__stub = NULL; \
+ int __ret = -1; \
+ \
+ __stub = fop_##name##_stub(frame, default_##name##_resume, args); \
+ if (!__stub) { \
+ __ret = -ENOMEM; \
+ goto out; \
+ } \
+ \
+ __ret = iot_schedule(frame, this, __stub); \
+ \
+ out: \
+ if (__ret < 0) { \
+ default_##name##_failure_cbk(frame, -__ret); \
+ if (__stub != NULL) { \
+ call_stub_destroy(__stub); \
+ } \
+ } \
+ } while (0)
+
+iot_client_ctx_t *
+iot_get_ctx(xlator_t *this, client_t *client)
+{
+ iot_client_ctx_t *ctx = NULL;
+ iot_client_ctx_t *setted_ctx = NULL;
+ int i;
+
+ if (client_ctx_get(client, this, (void **)&ctx) != 0) {
+ ctx = GF_MALLOC(GF_FOP_PRI_MAX * sizeof(*ctx), gf_iot_mt_client_ctx_t);
+ if (ctx) {
+ for (i = 0; i < GF_FOP_PRI_MAX; ++i) {
+ INIT_LIST_HEAD(&ctx[i].clients);
+ INIT_LIST_HEAD(&ctx[i].reqs);
+ }
+ setted_ctx = client_ctx_set(client, this, ctx);
+ if (ctx != setted_ctx) {
+ GF_FREE(ctx);
+ ctx = setted_ctx;
+ }
+ }
+ }
+
+ return ctx;
+}
+
call_stub_t *
-__iot_dequeue (iot_conf_t *conf, int *pri, struct timespec *sleep)
-{
- call_stub_t *stub = NULL;
- int i = 0;
- struct timeval curtv = {0,}, difftv = {0,};
-
- *pri = -1;
- sleep->tv_sec = 0;
- sleep->tv_nsec = 0;
- for (i = 0; i < IOT_PRI_MAX; i++) {
- if (list_empty (&conf->reqs[i]) ||
- (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]))
- continue;
-
- if (i == IOT_PRI_LEAST) {
- pthread_mutex_lock(&conf->throttle.lock);
- if (!conf->throttle.sample_time.tv_sec) {
- /* initialize */
- gettimeofday(&conf->throttle.sample_time, NULL);
- } else {
- /*
- * Maintain a running count of least priority
- * operations that are handled over a particular
- * time interval. The count is provided via
- * state dump and is used as a measure against
- * least priority op throttling.
- */
- gettimeofday(&curtv, NULL);
- timersub(&curtv, &conf->throttle.sample_time,
- &difftv);
- if (difftv.tv_sec >= IOT_LEAST_THROTTLE_DELAY) {
- conf->throttle.cached_rate =
- conf->throttle.sample_cnt;
- conf->throttle.sample_cnt = 0;
- conf->throttle.sample_time = curtv;
- }
-
- /*
- * If we're over the configured rate limit,
- * provide an absolute time to the caller that
- * represents the soonest we're allowed to
- * return another least priority request.
- */
- if (conf->throttle.rate_limit &&
- conf->throttle.sample_cnt >=
- conf->throttle.rate_limit) {
- struct timeval delay;
- delay.tv_sec = IOT_LEAST_THROTTLE_DELAY;
- delay.tv_usec = 0;
-
- timeradd(&conf->throttle.sample_time,
- &delay, &curtv);
- TIMEVAL_TO_TIMESPEC(&curtv, sleep);
-
- pthread_mutex_unlock(
- &conf->throttle.lock);
- break;
- }
- }
- conf->throttle.sample_cnt++;
- pthread_mutex_unlock(&conf->throttle.lock);
- }
-
- stub = list_entry (conf->reqs[i].next, call_stub_t, list);
- conf->ac_iot_count[i]++;
- *pri = i;
- break;
+__iot_dequeue(iot_conf_t *conf, int *pri)
+{
+ call_stub_t *stub = NULL;
+ int i = 0;
+ iot_client_ctx_t *ctx;
+
+ *pri = -1;
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]) {
+ continue;
}
- if (!stub)
- return NULL;
+ if (list_empty(&conf->clients[i])) {
+ continue;
+ }
- conf->queue_size--;
- conf->queue_sizes[*pri]--;
- list_del_init (&stub->list);
+ /* Get the first per-client queue for this priority. */
+ ctx = list_first_entry(&conf->clients[i], iot_client_ctx_t, clients);
+ if (!ctx) {
+ continue;
+ }
- return stub;
-}
+ if (list_empty(&ctx->reqs)) {
+ continue;
+ }
+ /* Get the first request on that queue. */
+ stub = list_first_entry(&ctx->reqs, call_stub_t, list);
+ list_del_init(&stub->list);
+ if (list_empty(&ctx->reqs)) {
+ list_del_init(&ctx->clients);
+ } else {
+ list_rotate_left(&conf->clients[i]);
+ }
+
+ conf->ac_iot_count[i]++;
+ conf->queue_marked[i] = _gf_false;
+ *pri = i;
+ break;
+ }
+
+ if (!stub)
+ return NULL;
+
+ conf->queue_size--;
+ conf->queue_sizes[*pri]--;
+
+ return stub;
+}
void
-__iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
+__iot_enqueue(iot_conf_t *conf, call_stub_t *stub, int pri)
{
- if (pri < 0 || pri >= IOT_PRI_MAX)
- pri = IOT_PRI_MAX-1;
+ client_t *client = stub->frame->root->client;
+ iot_client_ctx_t *ctx;
- list_add_tail (&stub->list, &conf->reqs[pri]);
+ if (pri < 0 || pri >= GF_FOP_PRI_MAX)
+ pri = GF_FOP_PRI_MAX - 1;
- conf->queue_size++;
- conf->queue_sizes[pri]++;
+ if (client) {
+ ctx = iot_get_ctx(THIS, client);
+ if (ctx) {
+ ctx = &ctx[pri];
+ }
+ } else {
+ ctx = NULL;
+ }
+ if (!ctx) {
+ ctx = &conf->no_client[pri];
+ }
- return;
-}
+ if (list_empty(&ctx->reqs)) {
+ list_add_tail(&ctx->clients, &conf->clients[pri]);
+ }
+ list_add_tail(&stub->list, &ctx->reqs);
+ conf->queue_size++;
+ GF_ATOMIC_INC(conf->stub_cnt);
+ conf->queue_sizes[pri]++;
+}
void *
-iot_worker (void *data)
-{
- iot_conf_t *conf = NULL;
- xlator_t *this = NULL;
- call_stub_t *stub = NULL;
- struct timespec sleep_till = {0, };
- int ret = 0;
- int pri = -1;
- char timeout = 0;
- char bye = 0;
- struct timespec sleep = {0,};
-
- conf = data;
- this = conf->this;
- THIS = this;
-
- for (;;) {
- sleep_till.tv_sec = time (NULL) + conf->idle_time;
-
- pthread_mutex_lock (&conf->mutex);
- {
- if (pri != -1) {
- conf->ac_iot_count[pri]--;
- pri = -1;
- }
- while (conf->queue_size == 0) {
- conf->sleep_count++;
-
- ret = pthread_cond_timedwait (&conf->cond,
- &conf->mutex,
- &sleep_till);
- conf->sleep_count--;
-
- if (ret == ETIMEDOUT) {
- timeout = 1;
- break;
- }
- }
-
- if (timeout) {
- if (conf->curr_count > IOT_MIN_THREADS) {
- conf->curr_count--;
- bye = 1;
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "timeout, terminated. conf->curr_count=%d",
- conf->curr_count);
- } else {
- timeout = 0;
- }
- }
-
- stub = __iot_dequeue (conf, &pri, &sleep);
- if (!stub && (sleep.tv_sec || sleep.tv_nsec)) {
- pthread_cond_timedwait(&conf->cond,
- &conf->mutex, &sleep);
- pthread_mutex_unlock(&conf->mutex);
- continue;
- }
+iot_worker(void *data)
+{
+ iot_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ struct timespec sleep_till = {
+ 0,
+ };
+ int ret = 0;
+ int pri = -1;
+ gf_boolean_t bye = _gf_false;
+
+ conf = data;
+ this = conf->this;
+ THIS = this;
+
+ for (;;) {
+ pthread_mutex_lock(&conf->mutex);
+ {
+ if (pri != -1) {
+ conf->ac_iot_count[pri]--;
+ pri = -1;
+ }
+ while (conf->queue_size == 0) {
+ if (conf->down) {
+ bye = _gf_true; /*Avoid sleep*/
+ break;
}
- pthread_mutex_unlock (&conf->mutex);
- if (stub) /* guard against spurious wakeups */
- call_resume (stub);
+ clock_gettime(CLOCK_REALTIME_COARSE, &sleep_till);
+ sleep_till.tv_sec += conf->idle_time;
- if (bye)
- break;
- }
+ conf->sleep_count++;
+ ret = pthread_cond_timedwait(&conf->cond, &conf->mutex,
+ &sleep_till);
+ conf->sleep_count--;
- if (pri != -1) {
- pthread_mutex_lock (&conf->mutex);
- {
- conf->ac_iot_count[pri]--;
+ if (conf->down || ret == ETIMEDOUT) {
+ bye = _gf_true;
+ break;
+ }
+ }
+
+ if (bye) {
+ if (conf->down || conf->curr_count > IOT_MIN_THREADS) {
+ conf->curr_count--;
+ if (conf->curr_count == 0)
+ pthread_cond_broadcast(&conf->cond);
+ gf_msg_debug(conf->this->name, 0,
+ "terminated. "
+ "conf->curr_count=%d",
+ conf->curr_count);
+ } else {
+ bye = _gf_false;
}
- pthread_mutex_unlock (&conf->mutex);
+ }
+
+ if (!bye)
+ stub = __iot_dequeue(conf, &pri);
}
- return NULL;
-}
+ pthread_mutex_unlock(&conf->mutex);
+
+ if (stub) { /* guard against spurious wakeups */
+ if (stub->poison) {
+ gf_log(this->name, GF_LOG_INFO, "Dropping poisoned request %p.",
+ stub);
+ call_stub_destroy(stub);
+ } else {
+ call_resume(stub);
+ }
+ GF_ATOMIC_DEC(conf->stub_cnt);
+ }
+ stub = NULL;
+
+ if (bye)
+ break;
+ }
+ return NULL;
+}
int
-do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri)
+do_iot_schedule(iot_conf_t *conf, call_stub_t *stub, int pri)
{
- int ret = 0;
+ int ret = 0;
- pthread_mutex_lock (&conf->mutex);
- {
- __iot_enqueue (conf, stub, pri);
+ pthread_mutex_lock(&conf->mutex);
+ {
+ __iot_enqueue(conf, stub, pri);
- pthread_cond_signal (&conf->cond);
+ pthread_cond_signal(&conf->cond);
- ret = __iot_workers_scale (conf);
- }
- pthread_mutex_unlock (&conf->mutex);
+ ret = __iot_workers_scale(conf);
+ }
+ pthread_mutex_unlock(&conf->mutex);
- return ret;
+ return ret;
}
-char*
-iot_get_pri_meaning (iot_pri_t pri)
-{
- char *name = NULL;
- switch (pri) {
- case IOT_PRI_HI:
- name = "fast";
- break;
- case IOT_PRI_NORMAL:
- name = "normal";
- break;
- case IOT_PRI_LO:
- name = "slow";
- break;
- case IOT_PRI_LEAST:
- name = "least priority";
- break;
- case IOT_PRI_MAX:
- name = "invalid";
- break;
- }
- return name;
+char *
+iot_get_pri_meaning(gf_fop_pri_t pri)
+{
+ char *name = NULL;
+ switch (pri) {
+ case GF_FOP_PRI_HI:
+ name = "fast";
+ break;
+ case GF_FOP_PRI_NORMAL:
+ name = "normal";
+ break;
+ case GF_FOP_PRI_LO:
+ name = "slow";
+ break;
+ case GF_FOP_PRI_LEAST:
+ name = "least";
+ break;
+ case GF_FOP_PRI_MAX:
+ name = "invalid";
+ break;
+ case GF_FOP_PRI_UNSPEC:
+ name = "unspecified";
+ break;
+ }
+ return name;
}
int
-iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
+iot_schedule(call_frame_t *frame, xlator_t *this, call_stub_t *stub)
{
- int ret = -1;
- iot_pri_t pri = IOT_PRI_MAX - 1;
- iot_conf_t *conf = this->private;
+ int ret = -1;
+ gf_fop_pri_t pri = GF_FOP_PRI_MAX - 1;
+ iot_conf_t *conf = this->private;
- if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) {
- pri = IOT_PRI_LEAST;
- goto out;
- }
+ if ((frame->root->pid < GF_CLIENT_PID_MAX) &&
+ (frame->root->pid != GF_CLIENT_PID_NO_ROOT_SQUASH) &&
+ conf->least_priority) {
+ pri = GF_FOP_PRI_LEAST;
+ goto out;
+ }
- switch (stub->fop) {
+ switch (stub->fop) {
case GF_FOP_OPEN:
case GF_FOP_STAT:
case GF_FOP_FSTAT:
@@ -270,8 +312,12 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_STATFS:
case GF_FOP_READDIR:
case GF_FOP_READDIRP:
- pri = IOT_PRI_HI;
- break;
+ case GF_FOP_GETACTIVELK:
+ case GF_FOP_SETACTIVELK:
+ case GF_FOP_ICREATE:
+ case GF_FOP_NAMELINK:
+ pri = GF_FOP_PRI_HI;
+ break;
case GF_FOP_CREATE:
case GF_FOP_FLUSH:
@@ -280,6 +326,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_FINODELK:
case GF_FOP_ENTRYLK:
case GF_FOP_FENTRYLK:
+ case GF_FOP_LEASE:
case GF_FOP_UNLINK:
case GF_FOP_SETATTR:
case GF_FOP_FSETATTR:
@@ -295,8 +342,9 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_FSETXATTR:
case GF_FOP_REMOVEXATTR:
case GF_FOP_FREMOVEXATTR:
- pri = IOT_PRI_NORMAL;
- break;
+ case GF_FOP_PUT:
+ pri = GF_FOP_PRI_NORMAL;
+ break;
case GF_FOP_READ:
case GF_FOP_WRITE:
@@ -307,2607 +355,1236 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_XATTROP:
case GF_FOP_FXATTROP:
case GF_FOP_RCHECKSUM:
- case GF_FOP_FALLOCATE:
- case GF_FOP_DISCARD:
- pri = IOT_PRI_LO;
- break;
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_SEEK:
+ pri = GF_FOP_PRI_LO;
+ break;
- case GF_FOP_NULL:
case GF_FOP_FORGET:
case GF_FOP_RELEASE:
case GF_FOP_RELEASEDIR:
case GF_FOP_GETSPEC:
- case GF_FOP_MAXVALUE:
- //fail compilation on missing fop
- //new fop must choose priority.
- break;
- }
-out:
- ret = do_iot_schedule (this->private, stub, pri);
- gf_log (this->name, GF_LOG_DEBUG, "%s scheduled as %s fop",
- gf_fop_list[stub->fop], iot_get_pri_meaning (pri));
- return ret;
-}
-
-int
-iot_lookup_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
-}
-
-
-int
-iot_lookup_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, iot_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- loc, xdata);
- return 0;
-}
-
-
-int
-iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create lookup stub (out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- STACK_UNWIND_STRICT (lookup, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
-
- return 0;
-}
-
-
-int
-iot_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop,
- xdata);
- return 0;
-}
-
-
-int
-iot_setattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- STACK_WIND (frame, iot_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
-}
-
-
-int
-iot_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_setattr_stub (frame, iot_setattr_wrapper, loc, stbuf, valid,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot create setattr stub"
- "(Out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL, NULL);
- }
-
- return 0;
-}
-
-
-int
-iot_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, preop, postop,
- xdata);
- return 0;
-}
-
-
-int
-iot_fsetattr_wrapper (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- STACK_WIND (frame, iot_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid,
- xdata);
- return 0;
-}
-
-
-int
-iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsetattr_stub (frame, iot_fsetattr_wrapper, fd, stbuf,
- valid, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsetattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int
-iot_access_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask, dict_t *xdata)
-{
- STACK_WIND (frame, iot_access_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->access, loc, mask, xdata);
- return 0;
-}
-
-
-int
-iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_access_stub (frame, iot_access_wrapper, loc, mask, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create access stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
+ break;
+ case GF_FOP_IPC:
+ default:
+ return -EINVAL;
+ }
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (access, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ gf_msg_debug(this->name, 0, "%s scheduled as %s priority fop",
+ gf_fop_list[stub->fop], iot_get_pri_meaning(pri));
+ if (this->private)
+ ret = do_iot_schedule(this->private, stub, pri);
+ return ret;
}
-
int
-iot_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *stbuf, dict_t *xdata)
+iot_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, stbuf,
- xdata);
- return 0;
+ IOT_FOP(lookup, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_readlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size, dict_t *xdata)
+iot_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, iot_readlink_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readlink,
- loc, size, xdata);
- return 0;
+ IOT_FOP(setattr, frame, this, loc, stbuf, valid, xdata);
+ return 0;
}
-
int
-iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
+iot_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create readlink stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(fsetattr, frame, this, fd, stbuf, valid, xdata);
+ return 0;
}
-
int
-iot_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int
-iot_mknod_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+iot_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_mknod_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
- return 0;
+ IOT_FOP(access, frame, this, loc, mask, xdata);
+ return 0;
}
-
int
-iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_mknod_stub (frame, iot_mknod_wrapper, loc, mode, rdev,
- umask, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create mknod stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_mkdir_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+iot_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ IOT_FOP(readlink, frame, this, loc, size, xdata);
+ return 0;
}
-
int
-iot_mkdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+iot_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, iot_mkdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
+ IOT_FOP(mknod, frame, this, loc, mode, rdev, umask, xdata);
+ return 0;
}
-
int
-iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode, umask,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create mkdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
-}
-
-
-int
-iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
+iot_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, iot_rmdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rmdir, loc, flags, xdata);
- return 0;
+ IOT_FOP(mkdir, frame, this, loc, mode, umask, xdata);
+ return 0;
}
-
int
-iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc, flags, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create rmdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_symlink_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+iot_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ IOT_FOP(rmdir, frame, this, loc, flags, xdata);
+ return 0;
}
-
int
-iot_symlink_wrapper (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
+iot_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, iot_symlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->symlink, linkname, loc, umask,
- xdata);
- return 0;
+ IOT_FOP(symlink, frame, this, linkname, loc, umask, xdata);
+ return 0;
}
-
int
-iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_symlink_stub (frame, iot_symlink_wrapper, linkname, loc,
- umask, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create symlink stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_rename_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, xdata);
- return 0;
-}
-
-
-int
-iot_rename_wrapper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+iot_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_rename_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rename, oldloc, newloc, xdata);
- return 0;
+ IOT_FOP(rename, frame, this, oldloc, newloc, xdata);
+ return 0;
}
-
int
-iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+iot_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_DEBUG, "cannot create rename stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(open, frame, this, loc, flags, fd, xdata);
+ return 0;
}
-
int
-iot_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
+iot_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ IOT_FOP(create, frame, this, loc, flags, mode, umask, fd, xdata);
+ return 0;
}
-
int
-iot_open_wrapper (call_frame_t * frame, xlator_t * this, loc_t *loc,
- int32_t flags, fd_t * fd, dict_t *xdata)
+iot_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
{
- STACK_WIND (frame, iot_open_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open, loc, flags, fd,
- xdata);
- return 0;
+ IOT_FOP(put, frame, this, loc, mode, umask, flags, vector, count, offset,
+ iobref, xattr, xdata);
+ return 0;
}
-
int
-iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create open call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+iot_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
- return 0;
+ IOT_FOP(readv, frame, this, fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-iot_create_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
- dict_t *xdata)
+iot_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, iot_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
+ IOT_FOP(flush, frame, this, fd, xdata);
+ return 0;
}
-
-int
-iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_create_stub (frame, iot_create_wrapper, loc, flags, mode,
- umask, fd, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create \"create\" call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
int
-iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
+iot_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
-
- return 0;
+ IOT_FOP(fsync, frame, this, fd, datasync, xdata);
+ return 0;
}
-
int
-iot_readv_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+iot_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+ IOT_FOP(writev, frame, this, fd, vector, count, offset, flags, iobref,
+ xdata);
+ return 0;
}
-
int
-iot_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+iot_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset,
- flags, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create readv call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, -1, NULL,
- NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(lk, frame, this, fd, cmd, flock, xdata);
+ return 0;
}
-
int
-iot_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+iot_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
+ IOT_FOP(stat, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+iot_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, iot_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
+ IOT_FOP(fstat, frame, this, fd, xdata);
+ return 0;
}
-
int
-iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_flush_stub (frame, iot_flush_wrapper, fd, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create flush_cbk call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (flush, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+iot_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ IOT_FOP(truncate, frame, this, loc, offset, xdata);
+ return 0;
}
-
int
-iot_fsync_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+iot_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_fsync_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync, xdata);
- return 0;
+ IOT_FOP(ftruncate, frame, this, fd, offset, xdata);
+ return 0;
}
-
int
-iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+iot_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fsync_cbk call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsync, frame, -1, -ret, NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(unlink, frame, this, loc, xflag, xdata);
+ return 0;
}
-
int
-iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+iot_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ IOT_FOP(link, frame, this, oldloc, newloc, xdata);
+ return 0;
}
-
int
-iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count,
- off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- STACK_WIND (frame, iot_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
-}
-
-
-int
-iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_writev_stub (frame, iot_writev_wrapper, fd, vector,
- count, offset, flags, iobref, xdata);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create writev call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-iot_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+iot_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
- return 0;
+ IOT_FOP(opendir, frame, this, loc, fd, xdata);
+ return 0;
}
-
int
-iot_lk_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+iot_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, flock, xdata);
- return 0;
+ IOT_FOP(fsyncdir, frame, this, fd, datasync, xdata);
+ return 0;
}
-
int
-iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
+iot_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock, xdata);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_lk call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (lk, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(statfs, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+iot_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ IOT_FOP(setxattr, frame, this, loc, dict, flags, xdata);
+ return 0;
}
-
-int
-iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- STACK_WIND (frame, iot_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc, xdata);
- return 0;
-}
-
-
int
-iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+iot_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_stat_stub (frame, iot_stat_wrapper, loc, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_stat call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
+ iot_conf_t *conf = NULL;
+ dict_t *depths = NULL;
+ int i = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (stat, frame, -1, -ret, NULL, NULL);
+ conf = this->private;
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ if (name && strcmp(name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+ /*
+ * We explicitly do not want a reference count
+ * for this dict in this translator
+ */
+ depths = dict_new();
+ if (!depths) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind_special_getxattr;
}
- return 0;
-}
-
-
-int
-iot_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-
-int
-iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, iot_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd, xdata);
- return 0;
-}
-
-
-int
-iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_fstat call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fstat, frame, -1, -ret, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (dict_set_int32(depths, (char *)fop_pri_to_string(i),
+ conf->queue_sizes[i]) != 0) {
+ dict_unref(depths);
+ depths = NULL;
+ goto unwind_special_getxattr;
+ }
}
- return 0;
-}
+ unwind_special_getxattr:
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, depths, xdata);
+ if (depths)
+ dict_unref(depths);
+ return 0;
+ }
-int
-iot_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ IOT_FOP(getxattr, frame, this, loc, name, xdata);
+ return 0;
}
-
int
-iot_truncate_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, iot_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset, xdata);
- return 0;
-}
-
-
-int
-iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+iot_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- call_stub_t *stub;
- int ret = -1;
-
- stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_stat call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(fgetxattr, frame, this, fd, name, xdata);
+ return 0;
}
-
int
-iot_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
-}
-
-
-int
-iot_ftruncate_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, iot_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset, xdata);
- return 0;
-}
-
-
-int
-iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_ftruncate call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, -ret, NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-
-int
-iot_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+iot_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
+ IOT_FOP(fsetxattr, frame, this, fd, dict, flags, xdata);
+ return 0;
}
-
int
-iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t xflag, dict_t *xdata)
+iot_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- STACK_WIND (frame, iot_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
+ IOT_FOP(removexattr, frame, this, loc, name, xdata);
+ return 0;
}
-
int
-iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
- dict_t *xdata)
+iot_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc, xflag, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_unlink call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(fremovexattr, frame, this, fd, name, xdata);
+ return 0;
}
-
int
-iot_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+iot_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ IOT_FOP(readdirp, frame, this, fd, size, offset, xdata);
+ return 0;
}
-
int
-iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new,
- dict_t *xdata)
+iot_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, iot_link_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->link, old, new, xdata);
-
- return 0;
+ IOT_FOP(readdir, frame, this, fd, size, offset, xdata);
+ return 0;
}
-
int
-iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+iot_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create link stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(inodelk, frame, this, volume, loc, cmd, lock, xdata);
+ return 0;
}
-
int
-iot_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+iot_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ IOT_FOP(finodelk, frame, this, volume, fd, cmd, lock, xdata);
+ return 0;
}
-
int
-iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
+iot_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_opendir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->opendir, loc, fd, xdata);
- return 0;
+ IOT_FOP(entrylk, frame, this, volume, loc, basename, cmd, type, xdata);
+ return 0;
}
-
int
-iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+iot_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create opendir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(fentrylk, frame, this, volume, fd, basename, cmd, type, xdata);
+ return 0;
}
-
int
-iot_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+iot_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
- return 0;
+ IOT_FOP(xattrop, frame, this, loc, optype, xattr, xdata);
+ return 0;
}
-
int
-iot_fsyncdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync, dict_t *xdata)
+iot_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_WIND (frame, iot_fsyncdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsyncdir, fd, datasync, xdata);
- return 0;
+ IOT_FOP(fxattrop, frame, this, fd, optype, xattr, xdata);
+ return 0;
}
-
-int
-iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
- dict_t *xdata)
+int32_t
+iot_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsyncdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(rchecksum, frame, this, fd, offset, len, xdata);
+ return 0;
}
-
int
-iot_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf,
- dict_t *xdata)
+iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ IOT_FOP(fallocate, frame, this, fd, mode, offset, len, xdata);
+ return 0;
}
-
int
-iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
+iot_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- STACK_WIND (frame, iot_statfs_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->statfs, loc, xdata);
- return 0;
+ IOT_FOP(discard, frame, this, fd, offset, len, xdata);
+ return 0;
}
-
int
-iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create statfs stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (statfs, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(zerofill, frame, this, fd, offset, len, xdata);
+ return 0;
}
-
int
-iot_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+iot_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ IOT_FOP(seek, frame, this, fd, offset, what, xdata);
+ return 0;
}
-
int
-iot_setxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags, dict_t *xdata)
+iot_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- STACK_WIND (frame, iot_setxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
+ IOT_FOP(lease, frame, this, loc, lease, xdata);
+ return 0;
}
-
int
-iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+iot_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_setxattr_stub (frame, iot_setxattr_wrapper, loc, dict,
- flags, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create setxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (setxattr, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(getactivelk, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+iot_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ IOT_FOP(setactivelk, frame, this, loc, locklist, xdata);
+ return 0;
}
-
int
-iot_getxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+__iot_workers_scale(iot_conf_t *conf)
{
- STACK_WIND (frame, iot_getxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
+ int scale = 0;
+ int diff = 0;
+ pthread_t thread;
+ int ret = 0;
+ int i = 0;
+ for (i = 0; i < GF_FOP_PRI_MAX; i++)
+ scale += min(conf->queue_sizes[i], conf->ac_iot_limit[i]);
-int
-iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create getxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ if (scale < IOT_MIN_THREADS)
+ scale = IOT_MIN_THREADS;
- ret = iot_schedule (frame, this, stub);
+ if (scale > conf->max_count)
+ scale = conf->max_count;
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, NULL, NULL);
+ if (conf->curr_count < scale) {
+ diff = scale - conf->curr_count;
+ }
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ while (diff) {
+ diff--;
+ ret = gf_thread_create(&thread, &conf->w_attr, iot_worker, conf,
+ "iotwr%03hx", conf->curr_count & 0x3ff);
+ if (ret == 0) {
+ pthread_detach(thread);
+ conf->curr_count++;
+ gf_msg_debug(conf->this->name, 0,
+ "scaled threads to %d (queue_size=%d/%d)",
+ conf->curr_count, conf->queue_size, scale);
+ } else {
+ break;
+ }
+ }
-int
-iot_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ return diff;
}
-
int
-iot_fgetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+iot_workers_scale(iot_conf_t *conf)
{
- STACK_WIND (frame, iot_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
+ int ret = -1;
+ if (conf == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
-int
-iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fgetxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ pthread_mutex_lock(&conf->mutex);
+ {
+ ret = __iot_workers_scale(conf);
+ }
+ pthread_mutex_unlock(&conf->mutex);
- ret = iot_schedule (frame, this, stub);
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ return ret;
}
-
int
-iot_fsetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags, dict_t *xdata)
+set_stack_size(iot_conf_t *conf)
{
- STACK_WIND (frame, iot_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags,
- xdata);
- return 0;
-}
+ int err = 0;
+ size_t stacksize = IOT_THREAD_STACK_SIZE;
+ xlator_t *this = NULL;
+ this = THIS;
-int
-iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsetxattr_stub (frame, iot_fsetxattr_wrapper, fd, dict,
- flags, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsetxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ err = pthread_attr_init(&conf->w_attr);
+ if (err != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, err, IO_THREADS_MSG_INIT_FAILED,
+ NULL);
+ return err;
+ }
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ err = pthread_attr_setstacksize(&conf->w_attr, stacksize);
+ if (err == EINVAL) {
+ err = pthread_attr_getstacksize(&conf->w_attr, &stacksize);
+ if (!err) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET,
+ "size=%zd", stacksize, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET,
+ NULL);
+ err = 0;
}
- return 0;
-}
-
+ }
-int
-iot_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ conf->stack_size = stacksize;
+ return err;
}
-
-int
-iot_removexattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+int32_t
+mem_acct_init(xlator_t *this)
{
- STACK_WIND (frame, iot_removexattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
+ int ret = -1;
+ if (!this)
+ return ret;
-int
-iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_removexattr_stub (frame, iot_removexattr_wrapper, loc,
- name, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,"cannot get removexattr fop"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ ret = xlator_mem_acct_init(this, gf_iot_mt_end + 1);
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (removexattr, frame, -1, -ret, NULL);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_NO_MEMORY,
+ NULL);
+ return ret;
+ }
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ return ret;
}
int
-iot_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+iot_priv_dump(xlator_t *this)
{
- STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
+ iot_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
-int
-iot_fremovexattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, iot_fremovexattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fremovexattr, fd, name, xdata);
+ if (!this)
return 0;
-}
-
-int
-iot_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fremovexattr_stub (frame, iot_fremovexattr_wrapper, fd,
- name, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,"cannot get fremovexattr fop"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fremovexattr, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ conf = this->private;
+ if (!conf)
return 0;
-}
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
-int
-iot_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count);
+ gf_proc_dump_write("current_threads_count", "%d", conf->curr_count);
+ gf_proc_dump_write("sleep_count", "%d", conf->sleep_count);
+ gf_proc_dump_write("idle_time", "%d", conf->idle_time);
+ gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
+ gf_proc_dump_write("max_high_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_HI]);
+ gf_proc_dump_write("max_normal_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL]);
+ gf_proc_dump_write("max_low_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_LO]);
+ gf_proc_dump_write("max_least_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_LEAST]);
+ gf_proc_dump_write("current_high_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_HI]);
+ gf_proc_dump_write("current_normal_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_NORMAL]);
+ gf_proc_dump_write("current_low_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_LO]);
+ gf_proc_dump_write("current_least_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_LEAST]);
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (!conf->queue_sizes[i])
+ continue;
+ snprintf(key, sizeof(key), "%s_priority_queue_length",
+ iot_get_pri_meaning(i));
+ gf_proc_dump_write(key, "%d", conf->queue_sizes[i]);
+ }
-int
-iot_readdirp_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, iot_readdirp_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdirp, fd, size, offset, xdata);
- return 0;
+ return 0;
}
-
-int
-iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readdirp_stub (frame, iot_readdirp_wrapper, fd, size,
- offset, xdata);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
+/*
+ * We use a decay model to keep track and make sure we're not spawning new
+ * threads too often. Each increment adds a large value to a counter, and that
+ * counter keeps ticking back down to zero over a fairly long period. For
+ * example, let's use ONE_WEEK=604800 seconds, and we want to detect when we
+ * have N=3 increments during that time. Thus, our threshold is
+ * (N-1)*ONE_WEEK. To see how it works, look at three examples.
+ *
+ * (a) Two events close together, then one more almost a week later. The
+ * first two events push our counter to 2*ONE_WEEK plus a bit. At the third
+ * event, we decay down to ONE_WEEK plus a bit and then add ONE_WEEK for the
+ * new event, exceeding our threshold.
+ *
+ * (b) One event, then two more almost a week later. At the time of the
+ * second and third events, the counter is already non-zero, so when we add
+ * 2*ONE_WEEK we exceed again.
+ *
+ * (c) Three events, spaced three days apart. At the time of the second
+ * event, we decay down to approxitely ONE_WEEK*4/7 and then add another
+ * ONE_WEEK. At the third event, we decay again down to ONE_WEEK*8/7 and add
+ * another ONE_WEEK, so boom.
+ *
+ * Note that in all three cases if that last event came a day later our counter
+ * would have decayed a bit more and we would *not* exceed our threshold. It's
+ * not exactly the same as a precise "three in one week" limit, but it's very
+ * close and it allows the same kind of tweaking while requiring only constant
+ * space - no arrays of variable length N to allocate or maintain. All we need
+ * (for each queue) is the value plus the time of the last update.
+ */
+
+typedef struct {
+ time_t update_time;
+ uint32_t value;
+} threshold_t;
+/*
+ * Variables so that I can hack these for testing.
+ * TBD: make these tunable?
+ */
+static uint32_t THRESH_SECONDS = 604800;
+static uint32_t THRESH_EVENTS = 3;
+static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */
+
+static void
+iot_apply_event(xlator_t *this, threshold_t *thresh)
+{
+ time_t delta, now = gf_time();
+
+ /* Refresh for manual testing/debugging. It's cheap. */
+ THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1);
+
+ if (thresh->value && thresh->update_time) {
+ delta = now - thresh->update_time;
+ /* Be careful about underflow. */
+ if (thresh->value <= delta) {
+ thresh->value = 0;
+ } else {
+ thresh->value -= delta;
+ }
+ }
+
+ thresh->value += THRESH_SECONDS;
+ if (thresh->value >= THRESH_LIMIT) {
+ gf_log(this->name, GF_LOG_EMERG, "watchdog firing too often");
+ /*
+ * The default action for SIGTRAP is to dump core, but the fact
+ * that it's distinct from other signals we use means that
+ * there are other possibilities as well (e.g. drop into gdb or
+ * invoke a special handler).
+ */
+ kill(getpid(), SIGTRAP);
+ }
+
+ thresh->update_time = now;
+}
+
+static void *
+iot_watchdog(void *arg)
+{
+ xlator_t *this = arg;
+ iot_conf_t *priv = this->private;
+ int i;
+ int bad_times[GF_FOP_PRI_MAX] = {
+ 0,
+ };
+ threshold_t thresholds[GF_FOP_PRI_MAX] = {{
+ 0,
+ }};
+
+ for (;;) {
+ sleep(max(priv->watchdog_secs / 5, 1));
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ pthread_mutex_lock(&priv->mutex);
+ for (i = 0; i < GF_FOP_PRI_MAX; ++i) {
+ if (priv->queue_marked[i]) {
+ if (++bad_times[i] >= 5) {
+ gf_log(this->name, GF_LOG_WARNING, "queue %d stalled", i);
+ iot_apply_event(this, &thresholds[i]);
+ /*
+ * We might not get here if the event
+ * put us over our threshold.
+ */
+ ++(priv->ac_iot_limit[i]);
+ bad_times[i] = 0;
}
+ } else {
+ bad_times[i] = 0;
+ }
+ priv->queue_marked[i] = (priv->queue_sizes[i] > 0);
}
- return 0;
-}
+ pthread_mutex_unlock(&priv->mutex);
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
-
-int
-iot_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
+ /* NOTREACHED */
+ return NULL;
}
-
-int
-iot_readdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, dict_t *xdata)
+static void
+start_iot_watchdog(xlator_t *this)
{
- STACK_WIND (frame, iot_readdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdir, fd, size, offset, xdata);
- return 0;
-}
+ iot_conf_t *priv = this->private;
+ int ret;
+ if (priv->watchdog_running) {
+ return;
+ }
-int
-iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset,
- xdata);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readdir, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-int
-iot_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ ret = pthread_create(&priv->watchdog_thread, NULL, iot_watchdog, this);
+ if (ret == 0) {
+ priv->watchdog_running = _gf_true;
+ } else {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_create(iot_watchdog) failed");
+ }
}
-
-int
-iot_inodelk_wrapper (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata)
+static void
+stop_iot_watchdog(xlator_t *this)
{
- STACK_WIND (frame, iot_inodelk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->inodelk, volume, loc, cmd, lock,
- xdata);
- return 0;
-}
-
+ iot_conf_t *priv = this->private;
-int
-iot_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
+ if (!priv->watchdog_running) {
+ return;
+ }
- stub = fop_inodelk_stub (frame, iot_inodelk_wrapper,
- volume, loc, cmd, lock, xdata);
- if (!stub) {
- ret = -ENOMEM;
- goto out;
- }
+ if (pthread_cancel(priv->watchdog_thread) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_cancel(iot_watchdog) failed");
+ }
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (inodelk, frame, -1, -ret, NULL);
+ if (pthread_join(priv->watchdog_thread, NULL) != 0) {
+ gf_log(this->name, GF_LOG_WARNING, "pthread_join(iot_watchdog) failed");
+ }
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ /* Failure probably means it's already dead. */
+ priv->watchdog_running = _gf_false;
}
int
-iot_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+reconfigure(xlator_t *this, dict_t *options)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ iot_conf_t *conf = NULL;
+ int ret = -1;
+ conf = this->private;
+ if (!conf)
+ goto out;
-int
-iot_finodelk_wrapper (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- STACK_WIND (frame, iot_finodelk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->finodelk, volume, fd, cmd, lock,
- xdata);
- return 0;
-}
+ GF_OPTION_RECONF("thread-count", conf->max_count, options, int32, out);
+ GF_OPTION_RECONF("high-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_HI],
+ options, int32, out);
-int
-iot_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
+ GF_OPTION_RECONF("normal-prio-threads",
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL], options, int32,
+ out);
- stub = fop_finodelk_stub (frame, iot_finodelk_wrapper,
- volume, fd, cmd, lock, xdata);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get finodelk stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_RECONF("low-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LO],
+ options, int32, out);
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (finodelk, frame, -1, -ret, NULL);
+ GF_OPTION_RECONF("least-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LEAST],
+ options, int32, out);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ GF_OPTION_RECONF("enable-least-priority", conf->least_priority, options,
+ bool, out);
-int
-iot_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ GF_OPTION_RECONF("cleanup-disconnected-reqs",
+ conf->cleanup_disconnected_reqs, options, bool, out);
+ GF_OPTION_RECONF("watchdog-secs", conf->watchdog_secs, options, int32, out);
-int
-iot_entrylk_wrapper (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- STACK_WIND (frame, iot_entrylk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
-}
-
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
-int
-iot_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_entrylk_stub (frame, iot_entrylk_wrapper,
- volume, loc, basename, cmd, type, xdata);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get entrylk stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ if (conf->watchdog_secs > 0) {
+ start_iot_watchdog(this);
+ } else {
+ stop_iot_watchdog(this);
+ }
- ret = iot_schedule (frame, this, stub);
+ ret = 0;
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (entrylk, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ return ret;
}
int
-iot_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+init(xlator_t *this)
{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ iot_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
+ if (!this->children || this->children->next) {
+ gf_smsg("io-threads", GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);
+ goto out;
+ }
-int
-iot_fentrylk_wrapper (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- STACK_WIND (frame, iot_fentrylk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
-}
+ if (!this->parents) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_VOL_MISCONFIGURED,
+ NULL);
+ }
+ conf = (void *)GF_CALLOC(1, sizeof(*conf), gf_iot_mt_iot_conf_t);
+ if (conf == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_OUT_OF_MEMORY,
+ NULL);
+ goto out;
+ }
-int
-iot_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fentrylk_stub (frame, iot_fentrylk_wrapper,
- volume, fd, basename, cmd, type, xdata);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get fentrylk stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_PTHREAD_INIT_FAILED,
+ "pthread_cond_init ret=%d", ret, NULL);
+ goto out;
+ }
+ conf->cond_inited = _gf_true;
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fentrylk, frame, -1, -ret, NULL);
+ if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_PTHREAD_INIT_FAILED,
+ "pthread_mutex_init ret=%d", ret, NULL);
+ goto out;
+ }
+ conf->mutex_inited = _gf_true;
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ ret = set_stack_size(conf);
+ if (ret != 0)
+ goto out;
-int
-iot_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, xdata);
- return 0;
-}
+ ret = -1;
+ GF_OPTION_INIT("thread-count", conf->max_count, int32, out);
-int
-iot_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- STACK_WIND (frame, iot_xattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, xdata);
- return 0;
-}
+ GF_OPTION_INIT("high-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_HI],
+ int32, out);
+ GF_OPTION_INIT("normal-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_NORMAL],
+ int32, out);
-int
-iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
+ GF_OPTION_INIT("low-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LO], int32,
+ out);
- stub = fop_xattrop_stub (frame, iot_xattrop_wrapper, loc, optype,
- xattr, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create xattrop stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("least-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LEAST],
+ int32, out);
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (xattrop, frame, -1, -ret, NULL, NULL);
+ GF_OPTION_INIT("idle-time", conf->idle_time, int32, out);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ GF_OPTION_INIT("enable-least-priority", conf->least_priority, bool, out);
+ GF_OPTION_INIT("cleanup-disconnected-reqs", conf->cleanup_disconnected_reqs,
+ bool, out);
-int
-iot_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, xattr, xdata);
- return 0;
-}
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
-int
-iot_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- STACK_WIND (frame, iot_fxattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, xdata);
- return 0;
-}
+ conf->this = this;
+ GF_ATOMIC_INIT(conf->stub_cnt, 0);
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ INIT_LIST_HEAD(&conf->clients[i]);
+ INIT_LIST_HEAD(&conf->no_client[i].clients);
+ INIT_LIST_HEAD(&conf->no_client[i].reqs);
+ }
-int
-iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fxattrop_stub (frame, iot_fxattrop_wrapper, fd, optype,
- xattr, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fxattrop stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ if (!this->pass_through) {
+ ret = iot_workers_scale(conf);
- ret = iot_schedule (frame, this, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fxattrop, frame, -1, -ret, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED, NULL);
+ goto out;
}
- return 0;
-}
+ }
+ this->private = conf;
-int32_t
-iot_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum, xdata);
- return 0;
-}
+ conf->watchdog_secs = 0;
+ GF_OPTION_INIT("watchdog-secs", conf->watchdog_secs, int32, out);
+ if (conf->watchdog_secs > 0) {
+ start_iot_watchdog(this);
+ }
-
-int32_t
-iot_rchecksum_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, int32_t len, dict_t *xdata)
-{
- STACK_WIND (frame, iot_rchecksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
- return 0;
-}
-
-
-int32_t
-iot_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rchecksum_stub (frame, iot_rchecksum_wrapper, fd, offset,
- len, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create rchecksum stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
+ ret = 0;
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rchecksum, frame, -1, -ret, -1, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ if (ret)
+ GF_FREE(conf);
- return 0;
+ return ret;
}
-int
-iot_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+static void
+iot_exit_threads(iot_conf_t *conf)
{
- STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, preop, postop,
- xdata);
- return 0;
+ pthread_mutex_lock(&conf->mutex);
+ {
+ conf->down = _gf_true;
+ /*Let all the threads know that xl is going down*/
+ pthread_cond_broadcast(&conf->cond);
+ while (conf->curr_count) /*Wait for threads to exit*/
+ pthread_cond_wait(&conf->cond, &conf->mutex);
+ }
+ pthread_mutex_unlock(&conf->mutex);
}
-
int
-iot_fallocate_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- STACK_WIND (frame, iot_fallocate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fallocate, fd, mode, offset, len,
- xdata);
- return 0;
-}
-
+ iot_conf_t *conf = this->private;
+ xlator_t *victim = data;
+ uint64_t stub_cnt = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
-int
-iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fallocate_stub(frame, iot_fallocate_wrapper, fd, mode, offset,
- len, xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fallocate stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fallocate, frame, -1, -ret, NULL, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-int
-iot_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, preop, postop,
- xdata);
- return 0;
-}
-
-
-int
-iot_discard_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- STACK_WIND (frame, iot_discard_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata);
- return 0;
-}
-
-
-int
-iot_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_discard_stub(frame, iot_discard_wrapper, fd, offset, len,
- xdata);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create discard stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule (frame, this, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (discard, frame, -1, -ret, NULL, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
+ if (GF_EVENT_PARENT_DOWN == event) {
+ if (victim->cleanup_starting) {
+ /* Wait for draining stub from queue before notify PARENT_DOWN */
+ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt);
+ if (stub_cnt) {
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ pthread_mutex_lock(&conf->mutex);
+ {
+ while (stub_cnt) {
+ (void)pthread_cond_timedwait(&conf->cond, &conf->mutex,
+ &sleep_till);
+ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt);
+ }
}
- }
- return 0;
-}
+ pthread_mutex_unlock(&conf->mutex);
+ }
-int
-__iot_workers_scale (iot_conf_t *conf)
-{
- int scale = 0;
- int diff = 0;
- pthread_t thread;
- int ret = 0;
- int i = 0;
-
- for (i = 0; i < IOT_PRI_MAX; i++)
- scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]);
-
- if (scale < IOT_MIN_THREADS)
- scale = IOT_MIN_THREADS;
-
- if (scale > conf->max_count)
- scale = conf->max_count;
-
- if (conf->curr_count < scale) {
- diff = scale - conf->curr_count;
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
+ } else {
+ iot_exit_threads(conf);
}
+ }
- while (diff) {
- diff --;
-
- ret = pthread_create (&thread, &conf->w_attr, iot_worker, conf);
- if (ret == 0) {
- conf->curr_count++;
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "scaled threads to %d (queue_size=%d/%d)",
- conf->curr_count, conf->queue_size, scale);
- } else {
- break;
- }
- }
-
- return diff;
-}
-
-
-int
-iot_workers_scale (iot_conf_t *conf)
-{
- int ret = -1;
-
- if (conf == NULL) {
- ret = -EINVAL;
- goto out;
+ if (GF_EVENT_CHILD_DOWN == event) {
+ if (victim->cleanup_starting) {
+ iot_exit_threads(conf);
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name);
}
+ }
- pthread_mutex_lock (&conf->mutex);
- {
- ret = __iot_workers_scale (conf);
- }
- pthread_mutex_unlock (&conf->mutex);
+ default_notify(this, event, data);
-out:
- return ret;
+ return 0;
}
-
void
-set_stack_size (iot_conf_t *conf)
-{
- int err = 0;
- size_t stacksize = IOT_THREAD_STACK_SIZE;
- xlator_t *this = NULL;
-
- this = THIS;
-
- pthread_attr_init (&conf->w_attr);
- err = pthread_attr_setstacksize (&conf->w_attr, stacksize);
- if (err == EINVAL) {
- err = pthread_attr_getstacksize (&conf->w_attr, &stacksize);
- if (!err)
- gf_log (this->name, GF_LOG_WARNING,
- "Using default thread stack size %zd",
- stacksize);
- else
- gf_log (this->name, GF_LOG_WARNING,
- "Using default thread stack size");
- }
-
- conf->stack_size = stacksize;
-}
-
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_iot_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int
-iot_priv_dump (xlator_t *this)
+fini(xlator_t *this)
{
- iot_conf_t *conf = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ iot_conf_t *conf = this->private;
- if (!this)
- return 0;
+ if (!conf)
+ return;
- conf = this->private;
- if (!conf)
- return 0;
+ if (conf->mutex_inited && conf->cond_inited)
+ iot_exit_threads(conf);
- snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
- this->name);
+ if (conf->cond_inited)
+ pthread_cond_destroy(&conf->cond);
- gf_proc_dump_add_section(key_prefix);
+ if (conf->mutex_inited)
+ pthread_mutex_destroy(&conf->mutex);
- gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count);
- gf_proc_dump_write("current_threads_count", "%d", conf->curr_count);
- gf_proc_dump_write("sleep_count", "%d", conf->sleep_count);
- gf_proc_dump_write("idle_time", "%d", conf->idle_time);
- gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
- gf_proc_dump_write("high_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_HI]);
- gf_proc_dump_write("normal_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_NORMAL]);
- gf_proc_dump_write("low_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_LO]);
- gf_proc_dump_write("least_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_LEAST]);
+ stop_iot_watchdog(this);
- gf_proc_dump_write("cached least rate", "%u",
- conf->throttle.cached_rate);
- gf_proc_dump_write("least rate limit", "%u", conf->throttle.rate_limit);
+ GF_FREE(conf);
- return 0;
+ this->private = NULL;
+ return;
}
int
-reconfigure (xlator_t *this, dict_t *options)
+iot_client_destroy(xlator_t *this, client_t *client)
{
- iot_conf_t *conf = NULL;
- int ret = -1;
-
- conf = this->private;
- if (!conf)
- goto out;
+ void *tmp = NULL;
- GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out);
+ if (client_ctx_del(client, this, &tmp) == 0) {
+ GF_FREE(tmp);
+ }
- GF_OPTION_RECONF ("high-prio-threads",
- conf->ac_iot_limit[IOT_PRI_HI], options, int32, out);
-
- GF_OPTION_RECONF ("normal-prio-threads",
- conf->ac_iot_limit[IOT_PRI_NORMAL], options, int32,
- out);
-
- GF_OPTION_RECONF ("low-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LO], options, int32, out);
-
- GF_OPTION_RECONF ("least-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LEAST], options, int32,
- out);
- GF_OPTION_RECONF ("enable-least-priority", conf->least_priority,
- options, bool, out);
-
- GF_OPTION_RECONF("least-rate-limit", conf->throttle.rate_limit, options,
- int32, out);
-
- ret = 0;
-out:
- return ret;
+ return 0;
}
+static int
+iot_disconnect_cbk(xlator_t *this, client_t *client)
+{
+ int i;
+ call_stub_t *curr;
+ call_stub_t *next;
+ iot_conf_t *conf = this->private;
+ iot_client_ctx_t *ctx;
-int
-init (xlator_t *this)
-{
- iot_conf_t *conf = NULL;
- int ret = -1;
- int i = 0;
-
- if (!this->children || this->children->next) {
- gf_log ("io-threads", GF_LOG_ERROR,
- "FATAL: iot not configured with exactly one child");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = (void *) GF_CALLOC (1, sizeof (*conf),
- gf_iot_mt_iot_conf_t);
- if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "pthread_cond_init failed (%d)", ret);
- goto out;
- }
-
- if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "pthread_mutex_init failed (%d)", ret);
- goto out;
- }
-
- set_stack_size (conf);
-
- GF_OPTION_INIT ("thread-count", conf->max_count, int32, out);
-
- GF_OPTION_INIT ("high-prio-threads",
- conf->ac_iot_limit[IOT_PRI_HI], int32, out);
-
- GF_OPTION_INIT ("normal-prio-threads",
- conf->ac_iot_limit[IOT_PRI_NORMAL], int32, out);
-
- GF_OPTION_INIT ("low-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LO], int32, out);
-
- GF_OPTION_INIT ("least-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LEAST], int32, out);
-
- GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out);
- GF_OPTION_INIT ("enable-least-priority", conf->least_priority,
- bool, out);
-
- GF_OPTION_INIT("least-rate-limit", conf->throttle.rate_limit, int32,
- out);
- if ((ret = pthread_mutex_init(&conf->throttle.lock, NULL)) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "pthread_mutex_init failed (%d)", ret);
- goto out;
- }
-
- conf->this = this;
-
- for (i = 0; i < IOT_PRI_MAX; i++) {
- INIT_LIST_HEAD (&conf->reqs[i]);
- }
-
- ret = iot_workers_scale (conf);
+ if (!conf || !conf->cleanup_disconnected_reqs) {
+ goto out;
+ }
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot initialize worker threads, exiting init");
- goto out;
+ pthread_mutex_lock(&conf->mutex);
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ ctx = &conf->no_client[i];
+ list_for_each_entry_safe(curr, next, &ctx->reqs, list)
+ {
+ if (curr->frame->root->client != client) {
+ continue;
+ }
+ gf_log(this->name, GF_LOG_INFO,
+ "poisoning %s fop at %p for client %s",
+ gf_fop_list[curr->fop], curr, client->client_uid);
+ curr->poison = _gf_true;
}
+ }
+ pthread_mutex_unlock(&conf->mutex);
- this->private = conf;
- ret = 0;
out:
- if (ret)
- GF_FREE (conf);
-
- return ret;
-}
-
-
-void
-fini (xlator_t *this)
-{
- iot_conf_t *conf = this->private;
-
- GF_FREE (conf);
-
- this->private = NULL;
- return;
+ return 0;
}
struct xlator_dumpops dumpops = {
- .priv = iot_priv_dump,
+ .priv = iot_priv_dump,
};
struct xlator_fops fops = {
- .open = iot_open,
- .create = iot_create,
- .readv = iot_readv,
- .writev = iot_writev,
- .flush = iot_flush,
- .fsync = iot_fsync,
- .lk = iot_lk,
- .stat = iot_stat,
- .fstat = iot_fstat,
- .truncate = iot_truncate,
- .ftruncate = iot_ftruncate,
- .unlink = iot_unlink,
- .lookup = iot_lookup,
- .setattr = iot_setattr,
- .fsetattr = iot_fsetattr,
- .access = iot_access,
- .readlink = iot_readlink,
- .mknod = iot_mknod,
- .mkdir = iot_mkdir,
- .rmdir = iot_rmdir,
- .symlink = iot_symlink,
- .rename = iot_rename,
- .link = iot_link,
- .opendir = iot_opendir,
- .fsyncdir = iot_fsyncdir,
- .statfs = iot_statfs,
- .setxattr = iot_setxattr,
- .getxattr = iot_getxattr,
- .fgetxattr = iot_fgetxattr,
- .fsetxattr = iot_fsetxattr,
- .removexattr = iot_removexattr,
- .fremovexattr = iot_fremovexattr,
- .readdir = iot_readdir,
- .readdirp = iot_readdirp,
- .inodelk = iot_inodelk,
- .finodelk = iot_finodelk,
- .entrylk = iot_entrylk,
- .fentrylk = iot_fentrylk,
- .xattrop = iot_xattrop,
- .fxattrop = iot_fxattrop,
- .rchecksum = iot_rchecksum,
- .fallocate = iot_fallocate,
- .discard = iot_discard,
+ .open = iot_open,
+ .create = iot_create,
+ .readv = iot_readv,
+ .writev = iot_writev,
+ .flush = iot_flush,
+ .fsync = iot_fsync,
+ .lk = iot_lk,
+ .stat = iot_stat,
+ .fstat = iot_fstat,
+ .truncate = iot_truncate,
+ .ftruncate = iot_ftruncate,
+ .unlink = iot_unlink,
+ .lookup = iot_lookup,
+ .setattr = iot_setattr,
+ .fsetattr = iot_fsetattr,
+ .access = iot_access,
+ .readlink = iot_readlink,
+ .mknod = iot_mknod,
+ .mkdir = iot_mkdir,
+ .rmdir = iot_rmdir,
+ .symlink = iot_symlink,
+ .rename = iot_rename,
+ .link = iot_link,
+ .opendir = iot_opendir,
+ .fsyncdir = iot_fsyncdir,
+ .statfs = iot_statfs,
+ .setxattr = iot_setxattr,
+ .getxattr = iot_getxattr,
+ .fgetxattr = iot_fgetxattr,
+ .fsetxattr = iot_fsetxattr,
+ .removexattr = iot_removexattr,
+ .fremovexattr = iot_fremovexattr,
+ .readdir = iot_readdir,
+ .readdirp = iot_readdirp,
+ .inodelk = iot_inodelk,
+ .finodelk = iot_finodelk,
+ .entrylk = iot_entrylk,
+ .fentrylk = iot_fentrylk,
+ .xattrop = iot_xattrop,
+ .fxattrop = iot_fxattrop,
+ .rchecksum = iot_rchecksum,
+ .fallocate = iot_fallocate,
+ .discard = iot_discard,
+ .zerofill = iot_zerofill,
+ .seek = iot_seek,
+ .lease = iot_lease,
+ .getactivelk = iot_getactivelk,
+ .setactivelk = iot_setactivelk,
+ .put = iot_put,
};
-struct xlator_cbks cbks;
+struct xlator_cbks cbks = {
+ .client_destroy = iot_client_destroy,
+ .client_disconnect = iot_disconnect_cbk,
+};
struct volume_options options[] = {
- { .key = {"thread-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .default_value = "16",
- .description = "Number of threads in IO threads translator which "
- "perform concurrent IO operations"
-
- },
- { .key = {"high-prio-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .default_value = "16",
- .description = "Max number of threads in IO threads translator which "
- "perform high priority IO operations at a given time"
-
- },
- { .key = {"normal-prio-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .default_value = "16",
- .description = "Max number of threads in IO threads translator which "
- "perform normal priority IO operations at a given time"
-
- },
- { .key = {"low-prio-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .default_value = "16",
- .description = "Max number of threads in IO threads translator which "
- "perform low priority IO operations at a given time"
-
- },
- { .key = {"least-prio-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .default_value = "1",
- .description = "Max number of threads in IO threads translator which "
- "perform least priority IO operations at a given time"
- },
- { .key = {"enable-least-priority"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Enable/Disable least priority"
- },
- {.key = {"idle-time"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 0x7fffffff,
- .default_value = "120",
- },
- {.key = {"least-rate-limit"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = INT_MAX,
- .default_value = "0",
- .description = "Max number of least priority operations to handle "
- "per-second"
- },
- { .key = {NULL},
- },
+ {.key = {"thread-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ /*.option = "thread-count"*/
+ .description = "Number of threads in IO threads translator which "
+ "perform concurrent IO operations"
+
+ },
+ {.key = {"high-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform high priority IO operations at a given time"
+
+ },
+ {.key = {"normal-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform normal priority IO operations at a given time"
+
+ },
+ {.key = {"low-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform low priority IO operations at a given time"
+
+ },
+ {.key = {"least-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform least priority IO operations at a given time"},
+ {.key = {"enable-least-priority"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = SITE_H_ENABLE_LEAST_PRIORITY,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Enable/Disable least priority"},
+ {
+ .key = {"idle-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 0x7fffffff,
+ .default_value = "120",
+ },
+ {.key = {"watchdog-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = 0,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Number of seconds a queue must be stalled before "
+ "starting an 'emergency' thread."},
+ {.key = {"cleanup-disconnected-reqs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-threads"},
+ .description = "'Poison' queued requests when a client disconnects"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-threads"},
+ .description = "Enable/Disable io threads translator"},
+ {
+ .key = {NULL},
+ },
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-threads",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 1a9dee9ae2c..f54d2f4912d 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -11,80 +11,74 @@
#ifndef __IOT_H
#define __IOT_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "compat-errno.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include <stdlib.h>
-#include "locking.h"
+#include <glusterfs/locking.h>
#include "iot-mem-types.h"
#include <semaphore.h>
-#include "statedump.h"
-
+#include <glusterfs/statedump.h>
struct iot_conf;
-#define MAX_IDLE_SKEW 4 /* In secs */
-#define skew_sec_idle_time(sec) ((sec) + (random () % MAX_IDLE_SKEW))
-#define IOT_DEFAULT_IDLE 120 /* In secs. */
-
-#define IOT_MIN_THREADS 1
-#define IOT_DEFAULT_THREADS 16
-#define IOT_MAX_THREADS 64
-
+#define MAX_IDLE_SKEW 4 /* In secs */
+#define skew_sec_idle_time(sec) ((sec) + (random() % MAX_IDLE_SKEW))
+#define IOT_DEFAULT_IDLE 120 /* In secs. */
-#define IOT_THREAD_STACK_SIZE ((size_t)(1024*1024))
+#define IOT_MIN_THREADS 1
+#define IOT_DEFAULT_THREADS 16
+#define IOT_MAX_THREADS 64
+#define IOT_THREAD_STACK_SIZE ((size_t)(256 * 1024))
-typedef enum {
- IOT_PRI_HI = 0, /* low latency */
- IOT_PRI_NORMAL, /* normal */
- IOT_PRI_LO, /* bulk */
- IOT_PRI_LEAST, /* least */
- IOT_PRI_MAX,
-} iot_pri_t;
-
-#define IOT_LEAST_THROTTLE_DELAY 1 /* sample interval in seconds */
-struct iot_least_throttle {
- struct timeval sample_time; /* timestamp of current sample */
- uint32_t sample_cnt; /* sample count for active interval */
- uint32_t cached_rate; /* the most recently measured rate */
- int32_t rate_limit; /* user-specified rate limit */
- pthread_mutex_t lock;
-};
+typedef struct {
+ struct list_head clients;
+ struct list_head reqs;
+} iot_client_ctx_t;
struct iot_conf {
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-
- int32_t max_count; /* configured maximum */
- int32_t curr_count; /* actual number of threads running */
- int32_t sleep_count;
-
- int32_t idle_time; /* in seconds */
-
- struct list_head reqs[IOT_PRI_MAX];
-
- int32_t ac_iot_limit[IOT_PRI_MAX];
- int32_t ac_iot_count[IOT_PRI_MAX];
- int queue_sizes[IOT_PRI_MAX];
- int queue_size;
- pthread_attr_t w_attr;
- gf_boolean_t least_priority; /*Enable/Disable least-priority */
-
- xlator_t *this;
- size_t stack_size;
-
- struct iot_least_throttle throttle;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ int32_t max_count; /* configured maximum */
+ int32_t curr_count; /* actual number of threads running */
+ int32_t sleep_count;
+
+ int32_t idle_time; /* in seconds */
+
+ struct list_head clients[GF_FOP_PRI_MAX];
+ /*
+ * It turns out that there are several ways a frame can get to us
+ * without having an associated client (server_first_lookup was the
+ * first one I hit). Instead of trying to update all such callers,
+ * we use this to queue them.
+ */
+ iot_client_ctx_t no_client[GF_FOP_PRI_MAX];
+
+ int32_t ac_iot_limit[GF_FOP_PRI_MAX];
+ int32_t ac_iot_count[GF_FOP_PRI_MAX];
+ int queue_sizes[GF_FOP_PRI_MAX];
+ int32_t queue_size;
+ gf_atomic_t stub_cnt;
+ pthread_attr_t w_attr;
+ gf_boolean_t least_priority; /*Enable/Disable least-priority */
+
+ xlator_t *this;
+ size_t stack_size;
+ gf_boolean_t down; /*PARENT_DOWN event is notified*/
+ gf_boolean_t mutex_inited;
+ gf_boolean_t cond_inited;
+
+ int32_t watchdog_secs;
+ gf_boolean_t watchdog_running;
+ pthread_t watchdog_thread;
+ gf_boolean_t queue_marked[GF_FOP_PRI_MAX];
+ gf_boolean_t cleanup_disconnected_reqs;
};
typedef struct iot_conf iot_conf_t;
diff --git a/xlators/performance/io-threads/src/iot-mem-types.h b/xlators/performance/io-threads/src/iot-mem-types.h
index 4fa8302d1f4..29565f34dd4 100644
--- a/xlators/performance/io-threads/src/iot-mem-types.h
+++ b/xlators/performance/io-threads/src/iot-mem-types.h
@@ -8,15 +8,14 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __IOT_MEM_TYPES_H__
#define __IOT_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_iot_mem_types_ {
- gf_iot_mt_iot_conf_t = gf_common_mt_end + 1,
- gf_iot_mt_end
+ gf_iot_mt_iot_conf_t = gf_common_mt_end + 1,
+ gf_iot_mt_client_ctx_t,
+ gf_iot_mt_end
};
#endif
-
diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am
index 8c9f5a8582f..447ff0f30f0 100644
--- a/xlators/performance/md-cache/src/Makefile.am
+++ b/xlators/performance/md-cache/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = md-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-md_cache_la_LDFLAGS = -module -avoid-version
+md_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
md_cache_la_SOURCES = md-cache.c
md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = md-cache-mem-types.h
+noinst_HEADERS = md-cache-mem-types.h md-cache-messages.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
-I$(CONTRIBDIR)/rbtree
AM_CFLAGS = -Wall $(GF_CFLAGS)
@@ -23,3 +24,6 @@ stat-prefetch-compat:
install-exec-local: stat-prefetch-compat
+
+uninstall-local:
+ rm -f $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h
index 6634cf962a5..47a07005717 100644
--- a/xlators/performance/md-cache/src/md-cache-mem-types.h
+++ b/xlators/performance/md-cache/src/md-cache-mem-types.h
@@ -8,17 +8,16 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __MDC_MEM_TYPES_H__
#define __MDC_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_mdc_mem_types_ {
- gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1,
- gf_mdc_mt_md_cache_t,
- gf_mdc_mt_mdc_conf_t,
- gf_mdc_mt_end
+ gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1,
+ gf_mdc_mt_md_cache_t,
+ gf_mdc_mt_mdc_conf_t,
+ gf_mdc_mt_mdc_ipc,
+ gf_mdc_mt_end
};
#endif
-
diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h
new file mode 100644
index 00000000000..f367bad1991
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-messages.h
@@ -0,0 +1,29 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MD_CACHE_MESSAGES_H_
+#define _MD_CACHE_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(MD_CACHE, MD_CACHE_MSG_NO_MEMORY, MD_CACHE_MSG_DISCARD_UPDATE,
+ MD_CACHE_MSG_CACHE_UPDATE, MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ MD_CACHE_MSG_NO_XATTR_CACHE);
+
+#endif /* _MD_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 2f52cbe5807..a405be51f02 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -8,2083 +8,4013 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
#include "md-cache-mem-types.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/glusterfs-acl.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/upcall-utils.h>
#include <assert.h>
#include <sys/time.h>
-
+#include "md-cache-messages.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/atomic.h>
/* TODO:
- cache symlink() link names and nuke symlink-cache
- send proper postbuf in setattr_cbk even when op_ret = -1
*/
-
-struct mdc_conf {
- int timeout;
- gf_boolean_t cache_posix_acl;
- gf_boolean_t cache_selinux;
- gf_boolean_t force_readdirp;
+struct mdc_statfs_cache {
+ pthread_mutex_t lock;
+ time_t last_refreshed; /* (time_t)-1 if not yet initialized. */
+ struct statvfs buf;
};
+struct mdc_statistics {
+ gf_atomic_t stat_hit; /* No. of times lookup/stat was served from
+ mdc */
-static struct mdc_key {
- const char *name;
- int load;
- int check;
-} mdc_keys[] = {
- {
- .name = "system.posix_acl_access",
- .load = 0,
- .check = 1,
- },
- {
- .name = "system.posix_acl_default",
- .load = 0,
- .check = 1,
- },
- {
- .name = GF_SELINUX_XATTR_KEY,
- .load = 0,
- .check = 1,
- },
- {
- .name = "security.capability",
- .load = 0,
- .check = 1,
- },
- {
- .name = "gfid-req",
- .load = 0,
- .check = 1,
- },
- {
- .name = NULL,
- .load = 0,
- .check = 0,
- }
-};
+ gf_atomic_t stat_miss; /* No. of times valid stat wasn't present in
+ mdc */
+ gf_atomic_t xattr_hit; /* No. of times getxattr was served from mdc,
+ Note: this doesn't count the xattr served
+ from lookup */
-static uint64_t
-gfid_to_ino (uuid_t gfid)
-{
- uint64_t ino = 0;
- int i = 0, j = 0;
-
- for (i = 15; i > (15 - 8); i--) {
- ino += (uint64_t)(gfid[i]) << j;
- j += 8;
- }
+ gf_atomic_t xattr_miss; /* No. of times xattr req was WIND from mdc */
+ gf_atomic_t negative_lookup; /* No. of negative lookups */
+ gf_atomic_t nameless_lookup; /* No. of negative lookups that were sent
+ to bricks */
- return ino;
-}
+ gf_atomic_t stat_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t xattr_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t need_lookup; /* No. of lookups issued, because other
+ xlators requested for explicit lookup */
+};
+struct mdc_conf {
+ uint32_t timeout;
+ gf_boolean_t cache_posix_acl;
+ gf_boolean_t cache_glusterfs_acl;
+ gf_boolean_t cache_selinux;
+ gf_boolean_t cache_capability;
+ gf_boolean_t cache_ima;
+ gf_boolean_t force_readdirp;
+ gf_boolean_t cache_swift_metadata;
+ gf_boolean_t cache_samba_metadata;
+ gf_boolean_t mdc_invalidation;
+ gf_boolean_t global_invalidation;
+
+ time_t last_child_down;
+ gf_lock_t lock;
+ struct mdc_statistics mdc_counter;
+ gf_boolean_t cache_statfs;
+ struct mdc_statfs_cache statfs_cache;
+ char *mdc_xattr_str;
+ gf_atomic_int32_t generation;
+};
struct mdc_local;
typedef struct mdc_local mdc_local_t;
-#define MDC_STACK_UNWIND(fop, frame, params ...) do { \
- mdc_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- mdc_local_wipe (__xl, __local); \
- } while (0)
-
+#define MDC_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ mdc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ mdc_local_wipe(__xl, __local); \
+ } while (0)
struct md_cache {
- ia_prot_t md_prot;
- uint32_t md_nlink;
- uint32_t md_uid;
- uint32_t md_gid;
- uint32_t md_atime;
- uint32_t md_atime_nsec;
- uint32_t md_mtime;
- uint32_t md_mtime_nsec;
- uint32_t md_ctime;
- uint32_t md_ctime_nsec;
- uint64_t md_rdev;
- uint64_t md_size;
- uint64_t md_blocks;
- dict_t *xattr;
- char *linkname;
- time_t ia_time;
- time_t xa_time;
- gf_lock_t lock;
+ ia_prot_t md_prot;
+ uint32_t md_nlink;
+ uint32_t md_uid;
+ uint32_t md_gid;
+ uint32_t md_atime_nsec;
+ uint32_t md_mtime_nsec;
+ uint32_t md_ctime_nsec;
+ int64_t md_atime;
+ int64_t md_mtime;
+ int64_t md_ctime;
+ uint64_t md_rdev;
+ uint64_t md_size;
+ uint64_t md_blocks;
+ uint64_t generation;
+ dict_t *xattr;
+ char *linkname;
+ time_t ia_time;
+ time_t xa_time;
+ gf_boolean_t need_lookup;
+ gf_boolean_t valid;
+ gf_boolean_t gen_rollover;
+ gf_boolean_t invalidation_rollover;
+ gf_lock_t lock;
};
-
struct mdc_local {
- loc_t loc;
- loc_t loc2;
- fd_t *fd;
- char *linkname;
- dict_t *xattr;
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ char *linkname;
+ char *key;
+ dict_t *xattr;
+ uint64_t incident_time;
+ bool update_cache;
};
-
int
-__mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+__mdc_inode_ctx_get(xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
{
- int ret = 0;
- struct md_cache *mdc = NULL;
- uint64_t mdc_int = 0;
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint64_t mdc_int = 0;
- ret = __inode_ctx_get (inode, this, &mdc_int);
- mdc = (void *) (long) (mdc_int);
- if (ret == 0 && mdc_p)
- *mdc_p = mdc;
+ ret = __inode_ctx_get(inode, this, &mdc_int);
+ mdc = (void *)(long)(mdc_int);
+ if (ret == 0 && mdc_p)
+ *mdc_p = mdc;
- return ret;
+ return ret;
}
-
int
-mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+mdc_inode_ctx_get(xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
{
- int ret;
+ int ret = -1;
+
+ if (!inode)
+ goto out;
- LOCK(&inode->lock);
- {
- ret = __mdc_inode_ctx_get (this, inode, mdc_p);
- }
- UNLOCK(&inode->lock);
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get(this, inode, mdc_p);
+ }
+ UNLOCK(&inode->lock);
- return ret;
+out:
+ return ret;
}
+uint64_t
+__mdc_inc_generation(xlator_t *this, struct md_cache *mdc)
+{
+ uint64_t gen = 0, rollover;
+ struct mdc_conf *conf = NULL;
-int
-__mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc)
+ conf = this->private;
+
+ gen = GF_ATOMIC_INC(conf->generation);
+ if (gen == 0) {
+ mdc->gen_rollover = !mdc->gen_rollover;
+ gen = GF_ATOMIC_INC(conf->generation);
+ mdc->ia_time = 0;
+ mdc->generation = 0;
+ }
+
+ rollover = mdc->gen_rollover;
+ gen |= (rollover << 32);
+ return gen;
+}
+
+uint64_t
+mdc_inc_generation(xlator_t *this, inode_t *inode)
{
- int ret = 0;
- uint64_t mdc_int = 0;
+ struct mdc_conf *conf = NULL;
+ uint64_t gen = 0;
+ struct md_cache *mdc = NULL;
+
+ conf = this->private;
- mdc_int = (long) mdc;
- ret = __inode_ctx_set2 (inode, this, &mdc_int, 0);
+ mdc_inode_ctx_get(this, inode, &mdc);
- return ret;
+ if (mdc) {
+ LOCK(&mdc->lock);
+ {
+ gen = __mdc_inc_generation(this, mdc);
+ }
+ UNLOCK(&mdc->lock);
+ } else {
+ gen = GF_ATOMIC_INC(conf->generation);
+ if (gen == 0) {
+ gen = GF_ATOMIC_INC(conf->generation);
+ }
+ }
+
+ return gen;
}
+uint64_t
+mdc_get_generation(xlator_t *this, inode_t *inode)
+{
+ struct mdc_conf *conf = NULL;
+ uint64_t gen = 0;
+ struct md_cache *mdc = NULL;
+
+ conf = this->private;
+
+ mdc_inode_ctx_get(this, inode, &mdc);
+
+ if (mdc) {
+ LOCK(&mdc->lock);
+ {
+ gen = mdc->generation;
+ }
+ UNLOCK(&mdc->lock);
+ } else
+ gen = GF_ATOMIC_GET(conf->generation);
+
+ return gen;
+}
int
-mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc)
+__mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc)
{
- int ret;
+ int ret = 0;
+ uint64_t mdc_int = 0;
- LOCK(&inode->lock);
- {
- ret = __mdc_inode_ctx_set (this, inode, mdc);
- }
- UNLOCK(&inode->lock);
+ mdc_int = (long)mdc;
+ ret = __inode_ctx_set(inode, this, &mdc_int);
- return ret;
+ return ret;
}
+int
+mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_set(this, inode, mdc);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
mdc_local_t *
-mdc_local_get (call_frame_t *frame)
+mdc_local_get(call_frame_t *frame, inode_t *inode)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
- if (local)
- goto out;
+ local = frame->local;
+ if (local)
+ goto out;
- local = GF_CALLOC (sizeof (*local), 1, gf_mdc_mt_mdc_local_t);
- if (!local)
- goto out;
+ local = GF_CALLOC(sizeof(*local), 1, gf_mdc_mt_mdc_local_t);
+ if (!local)
+ goto out;
- frame->local = local;
+ local->incident_time = mdc_get_generation(frame->this, inode);
+ frame->local = local;
out:
- return local;
+ return local;
}
-
void
-mdc_local_wipe (xlator_t *this, mdc_local_t *local)
+mdc_local_wipe(xlator_t *this, mdc_local_t *local)
{
- if (!local)
- return;
+ if (!local)
+ return;
- loc_wipe (&local->loc);
+ loc_wipe(&local->loc);
- loc_wipe (&local->loc2);
+ loc_wipe(&local->loc2);
- if (local->fd)
- fd_unref (local->fd);
+ if (local->fd)
+ fd_unref(local->fd);
- GF_FREE (local->linkname);
+ GF_FREE(local->linkname);
- if (local->xattr)
- dict_unref (local->xattr);
+ GF_FREE(local->key);
- GF_FREE (local);
- return;
-}
+ if (local->xattr)
+ dict_unref(local->xattr);
+ GF_FREE(local);
+ return;
+}
int
-mdc_inode_wipe (xlator_t *this, inode_t *inode)
+mdc_inode_wipe(xlator_t *this, inode_t *inode)
{
- int ret = 0;
- uint64_t mdc_int = 0;
- struct md_cache *mdc = NULL;
+ int ret = 0;
+ uint64_t mdc_int = 0;
+ struct md_cache *mdc = NULL;
- ret = inode_ctx_del (inode, this, &mdc_int);
- if (ret != 0)
- goto out;
+ ret = inode_ctx_del(inode, this, &mdc_int);
+ if (ret != 0)
+ goto out;
- mdc = (void *) (long) mdc_int;
+ mdc = (void *)(long)mdc_int;
- if (mdc->xattr)
- dict_unref (mdc->xattr);
+ if (mdc->xattr)
+ dict_unref(mdc->xattr);
- GF_FREE (mdc->linkname);
+ GF_FREE(mdc->linkname);
- GF_FREE (mdc);
+ GF_FREE(mdc);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
struct md_cache *
-mdc_inode_prep (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
- struct md_cache *mdc = NULL;
-
- LOCK (&inode->lock);
- {
- ret = __mdc_inode_ctx_get (this, inode, &mdc);
- if (ret == 0)
- goto unlock;
-
- mdc = GF_CALLOC (sizeof (*mdc), 1, gf_mdc_mt_md_cache_t);
- if (!mdc) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto unlock;
- }
+mdc_inode_prep(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get(this, inode, &mdc);
+ if (ret == 0)
+ goto unlock;
+
+ mdc = GF_CALLOC(sizeof(*mdc), 1, gf_mdc_mt_md_cache_t);
+ if (!mdc) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ goto unlock;
+ }
- LOCK_INIT (&mdc->lock);
+ LOCK_INIT(&mdc->lock);
- ret = __mdc_inode_ctx_set (this, inode, mdc);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- GF_FREE (mdc);
- mdc = NULL;
- }
+ ret = __mdc_inode_ctx_set(this, inode, mdc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ GF_FREE(mdc);
+ mdc = NULL;
}
+ }
unlock:
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
- return mdc;
+ return mdc;
}
-
+/* Cache is valid if:
+ * - It is not cached before any brick was down. Brick down case is handled by
+ * invalidating all the cache when any brick went down.
+ * - The cache time is not expired
+ */
static gf_boolean_t
-is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
-{
- struct mdc_conf *conf = NULL;
- time_t now = 0;
- gf_boolean_t ret = _gf_true;
- conf = this->private;
+__is_cache_valid(xlator_t *this, time_t mdc_time)
+{
+ gf_boolean_t ret = _gf_true;
+ struct mdc_conf *conf = NULL;
+ uint32_t timeout = 0;
+ time_t last_child_down = 0;
+
+ conf = this->private;
+
+ /* conf->lock here is not taken deliberately, so that the multi
+ * threaded IO doesn't contend on a global lock. While updating
+ * the variable, the lock is taken, so that at least the writes are
+ * intact. The read of last_child_down may return junk, but that
+ * is for a very short period of time.
+ */
+ last_child_down = conf->last_child_down;
+ timeout = conf->timeout;
+
+ if ((mdc_time == 0) ||
+ ((last_child_down != 0) && (mdc_time < last_child_down))) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ if (gf_time() >= (mdc_time + timeout)) {
+ ret = _gf_false;
+ }
- time (&now);
+out:
+ return ret;
+}
- LOCK (&mdc->lock);
- {
- if (now >= (mdc->ia_time + conf->timeout))
- ret = _gf_false;
+static gf_boolean_t
+is_md_cache_iatt_valid(xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK(&mdc->lock);
+ {
+ if (mdc->valid == _gf_false) {
+ ret = mdc->valid;
+ } else {
+ ret = __is_cache_valid(this, mdc->ia_time);
+ if (ret == _gf_false) {
+ mdc->ia_time = 0;
+ mdc->generation = 0;
+ }
}
- UNLOCK (&mdc->lock);
+ }
+ UNLOCK(&mdc->lock);
- return ret;
+ return ret;
}
-
static gf_boolean_t
-is_md_cache_xatt_valid (xlator_t *this, struct md_cache *mdc)
+is_md_cache_xatt_valid(xlator_t *this, struct md_cache *mdc)
{
- struct mdc_conf *conf = NULL;
- time_t now = 0;
- gf_boolean_t ret = _gf_true;
-
- conf = this->private;
+ gf_boolean_t ret = _gf_true;
- time (&now);
-
- LOCK (&mdc->lock);
- {
- if (now >= (mdc->xa_time + conf->timeout))
- ret = _gf_false;
- }
- UNLOCK (&mdc->lock);
+ LOCK(&mdc->lock);
+ {
+ ret = __is_cache_valid(this, mdc->xa_time);
+ if (ret == _gf_false)
+ mdc->xa_time = 0;
+ }
+ UNLOCK(&mdc->lock);
- return ret;
+ return ret;
}
-
void
-mdc_from_iatt (struct md_cache *mdc, struct iatt *iatt)
-{
- mdc->md_prot = iatt->ia_prot;
- mdc->md_nlink = iatt->ia_nlink;
- mdc->md_uid = iatt->ia_uid;
- mdc->md_gid = iatt->ia_gid;
- mdc->md_atime = iatt->ia_atime;
- mdc->md_atime_nsec = iatt->ia_atime_nsec;
- mdc->md_mtime = iatt->ia_mtime;
- mdc->md_mtime_nsec = iatt->ia_mtime_nsec;
- mdc->md_ctime = iatt->ia_ctime;
- mdc->md_ctime_nsec = iatt->ia_ctime_nsec;
- mdc->md_rdev = iatt->ia_rdev;
- mdc->md_size = iatt->ia_size;
- mdc->md_blocks = iatt->ia_blocks;
+mdc_from_iatt(struct md_cache *mdc, struct iatt *iatt)
+{
+ mdc->md_prot = iatt->ia_prot;
+ mdc->md_nlink = iatt->ia_nlink;
+ mdc->md_uid = iatt->ia_uid;
+ mdc->md_gid = iatt->ia_gid;
+ mdc->md_atime = iatt->ia_atime;
+ mdc->md_atime_nsec = iatt->ia_atime_nsec;
+ mdc->md_mtime = iatt->ia_mtime;
+ mdc->md_mtime_nsec = iatt->ia_mtime_nsec;
+ mdc->md_ctime = iatt->ia_ctime;
+ mdc->md_ctime_nsec = iatt->ia_ctime_nsec;
+ mdc->md_rdev = iatt->ia_rdev;
+ mdc->md_size = iatt->ia_size;
+ mdc->md_blocks = iatt->ia_blocks;
}
-
void
-mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt)
+mdc_to_iatt(struct md_cache *mdc, struct iatt *iatt)
{
- iatt->ia_prot = mdc->md_prot;
- iatt->ia_nlink = mdc->md_nlink;
- iatt->ia_uid = mdc->md_uid;
- iatt->ia_gid = mdc->md_gid;
- iatt->ia_atime = mdc->md_atime;
- iatt->ia_atime_nsec = mdc->md_atime_nsec;
- iatt->ia_mtime = mdc->md_mtime;
- iatt->ia_mtime_nsec = mdc->md_mtime_nsec;
- iatt->ia_ctime = mdc->md_ctime;
- iatt->ia_ctime_nsec = mdc->md_ctime_nsec;
- iatt->ia_rdev = mdc->md_rdev;
- iatt->ia_size = mdc->md_size;
- iatt->ia_blocks = mdc->md_blocks;
+ iatt->ia_prot = mdc->md_prot;
+ iatt->ia_nlink = mdc->md_nlink;
+ iatt->ia_uid = mdc->md_uid;
+ iatt->ia_gid = mdc->md_gid;
+ iatt->ia_atime = mdc->md_atime;
+ iatt->ia_atime_nsec = mdc->md_atime_nsec;
+ iatt->ia_mtime = mdc->md_mtime;
+ iatt->ia_mtime_nsec = mdc->md_mtime_nsec;
+ iatt->ia_ctime = mdc->md_ctime;
+ iatt->ia_ctime_nsec = mdc->md_ctime_nsec;
+ iatt->ia_rdev = mdc->md_rdev;
+ iatt->ia_size = mdc->md_size;
+ iatt->ia_blocks = mdc->md_blocks;
}
-
int
mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
- struct iatt *iatt)
-{
- int ret = -1;
- struct md_cache *mdc = NULL;
+ struct iatt *iatt, gf_boolean_t update_time,
+ uint64_t incident_time)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint32_t rollover = 0;
+ uint64_t gen = 0;
+ gf_boolean_t update_xa_time = _gf_false;
+ struct mdc_conf *conf = this->private;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc) {
+ ret = -1;
+ goto out;
+ }
+
+ rollover = incident_time >> 32;
+ incident_time = (incident_time & 0xffffffff);
+
+ LOCK(&mdc->lock);
+ {
+ if (!iatt || !iatt->ia_ctime) {
+ gf_msg_callingfn("md-cache", GF_LOG_TRACE, 0, 0,
+ "invalidating iatt(NULL)"
+ "(%s)",
+ uuid_utoa(inode->gfid));
+ mdc->ia_time = 0;
+ mdc->valid = 0;
+
+ gen = __mdc_inc_generation(this, mdc);
+ mdc->generation = (gen & 0xffffffff);
+ goto unlock;
+ }
- mdc = mdc_inode_prep (this, inode);
- if (!mdc)
- goto out;
+ /* There could be a race in invalidation, where the
+ * invalidations in order A, B reaches md-cache in the order
+ * B, A. Hence, make sure the invalidation A is discarded if
+ * it comes after B. ctime of a file is always in ascending
+ * order unlike atime and mtime(which can be changed by user
+ * to any date), also ctime gets updates when atime/mtime
+ * changes, hence check for ctime only.
+ */
+ if (mdc->md_ctime > iatt->ia_ctime) {
+ gf_msg_callingfn(this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request (%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+ if ((mdc->md_ctime == iatt->ia_ctime) &&
+ (mdc->md_ctime_nsec > iatt->ia_ctime_nsec)) {
+ gf_msg_callingfn(this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request(ctime_nsec) (%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
- LOCK (&mdc->lock);
- {
- if (!iatt || !iatt->ia_ctime) {
- mdc->ia_time = 0;
- goto unlock;
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (iatt->ia_ctime != mdc->md_ctime) ||
+ (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) {
+ if (conf->global_invalidation &&
+ (!prebuf || (prebuf->ia_mtime != mdc->md_mtime) ||
+ (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_ctime_nsec != mdc->md_ctime_nsec))) {
+ if (IA_ISREG(inode->ia_type)) {
+ gf_msg("md-cache", GF_LOG_TRACE, 0,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "prebuf doesn't match the value we have cached,"
+ " invalidate the inode(%s)",
+ uuid_utoa(inode->gfid));
+
+ inode_invalidate(inode);
}
+ } else {
+ update_xa_time = _gf_true;
+ }
+ }
- /*
- * Invalidate the inode if the mtime or ctime has changed
- * and the prebuf doesn't match the value we have cached.
- * TODO: writev returns with a NULL iatt due to
- * performance/write-behind, causing invalidation on writes.
- */
- if (IA_ISREG(inode->ia_type) &&
- ((iatt->ia_mtime != mdc->md_mtime) ||
- (iatt->ia_ctime != mdc->md_ctime)))
- if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) ||
- (prebuf->ia_mtime != mdc->md_mtime))
- inode_invalidate(inode);
-
- mdc_from_iatt (mdc, iatt);
-
- time (&mdc->ia_time);
+ if ((mdc->gen_rollover == rollover) &&
+ (incident_time >= mdc->generation)) {
+ mdc_from_iatt(mdc, iatt);
+ mdc->valid = _gf_true;
+ if (update_time) {
+ mdc->ia_time = gf_time();
+ if (mdc->xa_time && update_xa_time)
+ mdc->xa_time = mdc->ia_time;
+ }
+
+ gf_msg_callingfn(
+ "md-cache", GF_LOG_TRACE, 0, MD_CACHE_MSG_CACHE_UPDATE,
+ "Updated iatt(%s)"
+ " time:%lld generation=%lld",
+ uuid_utoa(iatt->ia_gfid), (unsigned long long)mdc->ia_time,
+ (unsigned long long)mdc->generation);
+ } else {
+ gf_msg_callingfn("md-cache", GF_LOG_TRACE, 0, 0,
+ "not updating cache (%s)"
+ "mdc-rollover=%u rollover=%u "
+ "mdc-generation=%llu "
+ "mdc-ia_time=%llu incident_time=%llu ",
+ uuid_utoa(iatt->ia_gfid), mdc->gen_rollover,
+ rollover, (unsigned long long)mdc->generation,
+ (unsigned long long)mdc->ia_time,
+ (unsigned long long)incident_time);
}
+ }
unlock:
- UNLOCK (&mdc->lock);
- ret = 0;
+ UNLOCK(&mdc->lock);
+
out:
- return ret;
+ return ret;
}
-int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt)
+int
+mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt,
+ uint64_t incident_time)
{
- return mdc_inode_iatt_set_validate(this, inode, NULL, iatt);
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true,
+ incident_time);
}
int
-mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt)
+mdc_inode_iatt_get(xlator_t *this, inode_t *inode, struct iatt *iatt)
{
- int ret = -1;
- struct md_cache *mdc = NULL;
+ int ret = -1;
+ struct md_cache *mdc = NULL;
- if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
- goto out;
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0) {
+ gf_msg_trace("md-cache", 0, "mdc_inode_ctx_get failed (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
- if (!is_md_cache_iatt_valid (this, mdc))
- goto out;
+ if (!is_md_cache_iatt_valid(this, mdc)) {
+ gf_msg_trace("md-cache", 0, "iatt cache not valid for (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
- LOCK (&mdc->lock);
- {
- mdc_to_iatt (mdc, iatt);
- }
- UNLOCK (&mdc->lock);
+ LOCK(&mdc->lock);
+ {
+ mdc_to_iatt(mdc, iatt);
+ }
+ UNLOCK(&mdc->lock);
- uuid_copy (iatt->ia_gfid, inode->gfid);
- iatt->ia_ino = gfid_to_ino (inode->gfid);
- iatt->ia_dev = 42;
- iatt->ia_type = inode->ia_type;
+ gf_uuid_copy(iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino(inode->gfid);
+ iatt->ia_dev = 42;
+ iatt->ia_type = inode->ia_type;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
struct updatedict {
- dict_t *dict;
- int ret;
+ dict_t *dict;
+ int ret;
};
static int
+is_mdc_key_satisfied(xlator_t *this, const char *key)
+{
+ int ret = 0;
+ char *pattern = NULL;
+ struct mdc_conf *conf = this->private;
+ char *mdc_xattr_str = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+
+ if (!key)
+ goto out;
+
+ /* conf->mdc_xattr_str, is never freed and is hence safely used outside
+ * of lock*/
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+ goto out;
+
+ mdc_xattr_str = gf_strdup(tmp1);
+ if (!mdc_xattr_str)
+ goto out;
+
+ pattern = strtok_r(mdc_xattr_str, ",", &tmp);
+ while (pattern) {
+ gf_strTrim(&pattern);
+ if (fnmatch(pattern, key, 0) == 0) {
+ ret = 1;
+ break;
+ } else {
+ gf_msg_trace("md-cache", 0,
+ "xattr key %s doesn't satisfy "
+ "caching requirements",
+ key);
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+ GF_FREE(mdc_xattr_str);
+out:
+ return ret;
+}
+
+static int
updatefn(dict_t *dict, char *key, data_t *value, void *data)
{
- struct updatedict *u = data;
- const char *mdc_key;
- int i = 0;
-
- for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
- if (!mdc_keys[i].check)
- continue;
- if (strcmp(mdc_key, key))
- continue;
-
- if (!u->dict) {
- u->dict = dict_new();
- if (!u->dict) {
- u->ret = -1;
- return -1;
- }
- }
-
- if (dict_set(u->dict, key, value) < 0) {
- u->ret = -1;
- return -1;
- }
-
- break;
- }
- return 0;
+ struct updatedict *u = data;
+
+ if (is_mdc_key_satisfied(THIS, key)) {
+ if (!u->dict) {
+ u->dict = dict_new();
+ if (!u->dict) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+
+ if (dict_set(u->dict, key, value) < 0) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+ return 0;
}
static int
mdc_dict_update(dict_t **tgt, dict_t *src)
{
- struct updatedict u = {
- .dict = *tgt,
- .ret = 0,
- };
+ struct updatedict u = {
+ .dict = *tgt,
+ .ret = 0,
+ };
- dict_foreach(src, updatefn, &u);
+ dict_foreach(src, updatefn, &u);
- if (*tgt)
- return u.ret;
+ if (*tgt)
+ return u.ret;
- if ((u.ret < 0) && u.dict) {
- dict_unref(u.dict);
- return u.ret;
- }
+ if ((u.ret < 0) && u.dict) {
+ dict_unref(u.dict);
+ return u.ret;
+ }
- *tgt = u.dict;
+ *tgt = u.dict;
- return u.ret;
+ return u.ret;
}
int
-mdc_inode_xatt_set (xlator_t *this, inode_t *inode, dict_t *dict)
+mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict)
{
- int ret = -1;
- struct md_cache *mdc = NULL;
- dict_t *newdict = NULL;
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+ dict_t *newdict = NULL;
- mdc = mdc_inode_prep (this, inode);
- if (!mdc)
- goto out;
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
- if (!dict)
- goto out;
+ if (!dict) {
+ gf_msg_trace("md-cache", 0,
+ "mdc_inode_xatt_set failed (%s) "
+ "dict NULL",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
- LOCK (&mdc->lock);
- {
- if (mdc->xattr) {
- dict_unref (mdc->xattr);
- mdc->xattr = NULL;
- }
+ LOCK(&mdc->lock);
+ {
+ if (mdc->xattr) {
+ gf_msg_trace("md-cache", 0,
+ "deleting the old xattr "
+ "cache (%s)",
+ uuid_utoa(inode->gfid));
+ dict_unref(mdc->xattr);
+ mdc->xattr = NULL;
+ }
- ret = mdc_dict_update(&newdict, dict);
- if (ret < 0) {
- UNLOCK(&mdc->lock);
- goto out;
- }
+ ret = mdc_dict_update(&newdict, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
- if (newdict)
- mdc->xattr = newdict;
+ if (newdict)
+ mdc->xattr = newdict;
- time (&mdc->xa_time);
- }
- UNLOCK (&mdc->lock);
- ret = 0;
+ mdc->xa_time = gf_time();
+ gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld",
+ uuid_utoa(inode->gfid), (long long)mdc->xa_time);
+ }
+ UNLOCK(&mdc->lock);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-mdc_inode_xatt_update (xlator_t *this, inode_t *inode, dict_t *dict)
+mdc_inode_xatt_update(xlator_t *this, inode_t *inode, dict_t *dict)
{
- int ret = -1;
- struct md_cache *mdc = NULL;
+ int ret = -1;
+ struct md_cache *mdc = NULL;
- mdc = mdc_inode_prep (this, inode);
- if (!mdc)
- goto out;
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
- if (!dict)
- goto out;
+ if (!dict)
+ goto out;
- LOCK (&mdc->lock);
- {
- ret = mdc_dict_update(&mdc->xattr, dict);
- if (ret < 0) {
- UNLOCK(&mdc->lock);
- goto out;
- }
-
- time (&mdc->xa_time);
+ LOCK(&mdc->lock);
+ {
+ ret = mdc_dict_update(&mdc->xattr, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
}
- UNLOCK (&mdc->lock);
+ }
+ UNLOCK(&mdc->lock);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
+int
+mdc_inode_xatt_unset(xlator_t *this, inode_t *inode, char *name)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!name || !mdc->xattr)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ dict_del(mdc->xattr, name);
+ }
+ UNLOCK(&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
int
-mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict)
+mdc_inode_xatt_get(xlator_t *this, inode_t *inode, dict_t **dict)
{
- int ret = -1;
- struct md_cache *mdc = NULL;
+ int ret = -1;
+ struct md_cache *mdc = NULL;
- if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
- goto out;
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0) {
+ gf_msg_trace("md-cache", 0, "mdc_inode_ctx_get failed (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
- if (!is_md_cache_xatt_valid (this, mdc))
- goto out;
+ if (!is_md_cache_xatt_valid(this, mdc)) {
+ gf_msg_trace("md-cache", 0, "xattr cache not valid for (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
- LOCK (&mdc->lock);
- {
- ret = 0;
- /* Missing xattr only means no keys were there, i.e
- a negative cache for the "loaded" keys
- */
- if (!mdc->xattr)
- goto unlock;
-
- if (dict)
- *dict = dict_ref (mdc->xattr);
+ LOCK(&mdc->lock);
+ {
+ ret = 0;
+ /* Missing xattr only means no keys were there, i.e
+ a negative cache for the "loaded" keys
+ */
+ if (!mdc->xattr) {
+ gf_msg_trace("md-cache", 0, "xattr not present (%s)",
+ uuid_utoa(inode->gfid));
+ goto unlock;
}
+
+ if (dict)
+ *dict = dict_ref(mdc->xattr);
+ }
unlock:
- UNLOCK (&mdc->lock);
+ UNLOCK(&mdc->lock);
out:
- return ret;
+ return ret;
}
+gf_boolean_t
+mdc_inode_reset_need_lookup(xlator_t *this, inode_t *inode)
+{
+ struct md_cache *mdc = NULL;
+ gf_boolean_t need = _gf_false;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ need = mdc->need_lookup;
+ mdc->need_lookup = _gf_false;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return need;
+}
void
-mdc_load_reqs (xlator_t *this, dict_t *dict)
+mdc_inode_set_need_lookup(xlator_t *this, inode_t *inode, gf_boolean_t need)
{
- const char *mdc_key = NULL;
- int i = 0;
- int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
- for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
- if (!mdc_keys[i].load)
- continue;
- ret = dict_set_int8 (dict, (char *)mdc_key, 0);
- if (ret)
- return;
- }
+ LOCK(&mdc->lock);
+ {
+ mdc->need_lookup = need;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return;
}
+void
+mdc_inode_iatt_invalidate(xlator_t *this, inode_t *inode)
+{
+ struct md_cache *mdc = NULL;
+ uint32_t gen = 0;
-struct checkpair {
- int ret;
- dict_t *rsp;
-};
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+ gen = mdc_inc_generation(this, inode) & 0xffffffff;
-static int
-is_mdc_key_satisfied (const char *key)
+ LOCK(&mdc->lock);
+ {
+ mdc->ia_time = 0;
+ mdc->valid = _gf_false;
+ mdc->generation = gen;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return;
+}
+
+int
+mdc_inode_xatt_invalidate(xlator_t *this, inode_t *inode)
{
- const char *mdc_key = NULL;
- int i = 0;
+ int ret = -1;
+ struct md_cache *mdc = NULL;
- if (!key)
- return 0;
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
- for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
- if (!mdc_keys[i].load)
- continue;
- if (strcmp (mdc_key, key) == 0)
- return 1;
- }
+ LOCK(&mdc->lock);
+ {
+ mdc->xa_time = 0;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+static int
+mdc_update_gfid_stat(xlator_t *this, struct iatt *iatt)
+{
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, iatt->ia_gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+ ret = mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true,
+ mdc_inc_generation(this, inode));
+out:
+ return ret;
+}
+
+static bool
+mdc_load_reqs(xlator_t *this, dict_t *dict)
+{
+ struct mdc_conf *conf = this->private;
+ char *pattern = NULL;
+ char *mdc_xattr_str = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ int ret = 0;
+ bool loaded = false;
+
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+ goto out;
+
+ mdc_xattr_str = gf_strdup(tmp1);
+ if (!mdc_xattr_str)
+ goto out;
+
+ pattern = strtok_r(mdc_xattr_str, ",", &tmp);
+ while (pattern) {
+ gf_strTrim(&pattern);
+ ret = dict_set_int8(dict, pattern, 0);
+ if (ret) {
+ conf->mdc_xattr_str = NULL;
+ gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for xattrs, dict_set failed");
+ goto out;
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+
+ loaded = true;
+
+out:
+ GF_FREE(mdc_xattr_str);
- return 0;
+ return loaded;
}
+struct checkpair {
+ int ret;
+ dict_t *rsp;
+};
static int
-checkfn (dict_t *this, char *key, data_t *value, void *data)
+checkfn(dict_t *this, char *key, data_t *value, void *data)
{
- struct checkpair *pair = data;
+ struct checkpair *pair = data;
- if (!is_mdc_key_satisfied (key))
- pair->ret = 0;
+ if (!is_mdc_key_satisfied(THIS, key))
+ pair->ret = 0;
- return 0;
+ return 0;
}
-
int
-mdc_xattr_satisfied (xlator_t *this, dict_t *req, dict_t *rsp)
+mdc_xattr_satisfied(xlator_t *this, dict_t *req, dict_t *rsp)
{
- struct checkpair pair = {
- .ret = 1,
- .rsp = rsp,
- };
+ struct checkpair pair = {
+ .ret = 1,
+ .rsp = rsp,
+ };
- dict_foreach (req, checkfn, &pair);
+ dict_foreach(req, checkfn, &pair);
- return pair.ret;
+ return pair.ret;
}
+static void
+mdc_cache_statfs(xlator_t *this, struct statvfs *buf)
+{
+ struct mdc_conf *conf = this->private;
+
+ pthread_mutex_lock(&conf->statfs_cache.lock);
+ {
+ memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs));
+ conf->statfs_cache.last_refreshed = gf_time();
+ }
+ pthread_mutex_unlock(&conf->statfs_cache.lock);
+}
int
-mdc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
+mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
{
- mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+ uint32_t cache_age = 0;
+ int ret = 0;
+
+ if (!buf || !conf) {
+ ret = -1;
+ goto err;
+ }
+
+ *buf = NULL;
+
+ pthread_mutex_lock(&conf->statfs_cache.lock);
+ {
+ /* Skip if the cache is not initialized. */
+ if (conf->statfs_cache.last_refreshed == (time_t)-1) {
+ ret = -1;
+ goto unlock;
+ }
+
+ cache_age = (gf_time() - conf->statfs_cache.last_refreshed);
+
+ gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs",
+ cache_age);
+ if (cache_age > conf->timeout) {
+ /* Expire the cache. */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Cache age %u secs exceeded timeout %u secs", cache_age,
+ conf->timeout);
+ ret = -1;
+ goto unlock;
+ }
+
+ *buf = &conf->statfs_cache.buf;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->statfs_cache.lock);
+err:
+ return ret;
+}
- local = frame->local;
+static dict_t *
+mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata)
+{
+ if (xdata != NULL) {
+ dict_ref(xdata);
+ }
- if (op_ret != 0)
- goto out;
+ if (local == NULL) {
+ return xdata;
+ }
- if (!local)
- goto out;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ local->update_cache = false;
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ return NULL;
}
+ }
+
+ local->update_cache = mdc_load_reqs(this, xdata);
+
+ return xdata;
+}
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, local->loc.inode, stbuf);
- mdc_inode_xatt_set (this, local->loc.inode, dict);
+int
+mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ struct mdc_conf *conf = this->private;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
}
+
+ goto out;
+ }
+
+ if (conf && conf->cache_statfs) {
+ mdc_cache_statfs(this, buf);
+ }
+
out:
- MDC_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf,
- dict, postparent);
- return 0;
+ MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
}
+int
+mdc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = 0, op_ret = 0, op_errno = 0;
+ struct statvfs *buf = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!conf) {
+ goto uncached;
+ }
+
+ if (!conf->cache_statfs) {
+ goto uncached;
+ }
+
+ ret = mdc_load_statfs_info_from_cache(this, &buf);
+ if (ret == 0 && buf) {
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ }
+
+uncached:
+ STACK_WIND(frame, mdc_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+
+out:
+ MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
int
-mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
+mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
{
- int ret = 0;
- struct iatt stbuf = {0, };
- struct iatt postparent = {0, };
- dict_t *xattr_rsp = NULL;
- dict_t *xattr_alloc = NULL;
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+ local = frame->local;
- local = mdc_local_get (frame);
- if (!local)
- goto uncached;
+ if (!local)
+ goto out;
- loc_copy (&local->loc, loc);
+ if (op_ret != 0) {
+ if (op_errno == ENOENT)
+ GF_ATOMIC_INC(conf->mdc_counter.negative_lookup);
- ret = mdc_inode_iatt_get (this, loc->inode, &stbuf);
- if (ret != 0)
- goto uncached;
+ if (op_errno == ESTALE) {
+ /* if op_errno is ENOENT, fuse-bridge will unlink the
+ * dentry
+ */
+ if (local->loc.parent)
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ else
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
- if (xdata) {
- ret = mdc_inode_xatt_get (this, loc->inode, &xattr_rsp);
- if (ret != 0)
- goto uncached;
+ goto out;
+ }
- if (!mdc_xattr_satisfied (this, xdata, xattr_rsp))
- goto uncached;
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, dict);
+ }
+ }
+out:
+ MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict,
+ postparent);
+ return 0;
+}
+
+int
+mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!inode_is_linked(loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ if (mdc_inode_reset_need_lookup(this, loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.need_lookup);
+ goto uncached;
+ }
+
+ ret = mdc_inode_iatt_get(this, loc->inode, &stbuf);
+ if (ret != 0) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ if (xdata) {
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr_rsp);
+ if (ret != 0) {
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ goto uncached;
}
- MDC_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &stbuf,
- xattr_rsp, &postparent);
+ if (!mdc_xattr_satisfied(this, xdata, xattr_rsp)) {
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ goto uncached;
+ }
+ }
- if (xattr_rsp)
- dict_unref (xattr_rsp);
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(lookup, frame, 0, 0, loc->inode, &stbuf, xattr_rsp,
+ &postparent);
- return 0;
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
uncached:
- if (!xdata)
- xdata = xattr_alloc = dict_new ();
- if (xdata)
- mdc_load_reqs (this, xdata);
-
- STACK_WIND (frame, mdc_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xdata);
-
- if (xattr_rsp)
- dict_unref (xattr_rsp);
- if (xattr_alloc)
- dict_unref (xattr_alloc);
- return 0;
-}
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
int
-mdc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- local = frame->local;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
- mdc_inode_iatt_set (this, local->loc.inode, buf);
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
out:
- MDC_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int ret;
- struct iatt stbuf;
- mdc_local_t *local = NULL;
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
- local = mdc_local_get (frame);
- if (!local)
- goto uncached;
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto uncached;
- loc_copy (&local->loc, loc);
+ loc_copy(&local->loc, loc);
- ret = mdc_inode_iatt_get (this, loc->inode, &stbuf);
- if (ret != 0)
- goto uncached;
+ if (!inode_is_linked(loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
- MDC_STACK_UNWIND (stat, frame, 0, 0, &stbuf, xdata);
+ ret = mdc_inode_iatt_get(this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
- return 0;
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(stat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
uncached:
- STACK_WIND (frame, mdc_stat_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
- loc, xdata);
- return 0;
-}
+ xdata = mdc_prepare_request(this, local, xdata);
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
int
-mdc_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- local = frame->local;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ }
+
+ goto out;
+ }
- mdc_inode_iatt_set (this, local->fd->inode, buf);
+ mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
out:
- MDC_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int ret;
- struct iatt stbuf;
- mdc_local_t *local = NULL;
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
- local = mdc_local_get (frame);
- if (!local)
- goto uncached;
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto uncached;
- local->fd = fd_ref (fd);
+ local->fd = __fd_ref(fd);
- ret = mdc_inode_iatt_get (this, fd->inode, &stbuf);
- if (ret != 0)
- goto uncached;
+ ret = mdc_inode_iatt_get(this, fd->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
- MDC_STACK_UNWIND (fstat, frame, 0, 0, &stbuf, xdata);
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(fstat, frame, 0, 0, &stbuf, xdata);
- return 0;
+ return 0;
uncached:
- STACK_WIND (frame, mdc_fstat_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
- fd, xdata);
- return 0;
-}
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
int
-mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+mdc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret != 0)
- goto out;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
- mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
out:
- MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset, dict_t *xdata)
+mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ local->loc.inode = inode_ref(loc->inode);
+ }
- local->loc.inode = inode_ref (loc->inode);
-
- STACK_WIND (frame, mdc_truncate_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
- loc, offset, xdata);
- return 0;
+ STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
-
int
-mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+mdc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
- local = frame->local;
+ if (!local)
+ goto out;
- if (op_ret != 0)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
- if (!local)
- goto out;
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
out:
- MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- return 0;
+ return 0;
}
-
int
-mdc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
+mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
- local->fd = fd_ref (fd);
-
- STACK_WIND (frame, mdc_ftruncate_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
- fd, offset, xdata);
- return 0;
+ STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
-
int
-mdc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+mdc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
- if (!local)
- goto out;
+ if (!local)
+ goto out;
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
}
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, local->loc.inode, buf);
- mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
- }
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
out:
- MDC_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
+int
+mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+}
int
-mdc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+mdc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = frame->local;
- loc_copy (&local->loc, loc);
- local->xattr = dict_ref (xdata);
+ if (!local)
+ goto out;
- STACK_WIND (frame, mdc_mknod_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
+int
+mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
int
-mdc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
+mdc_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret != 0)
- goto out;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ /* if errno is ESTALE, parent is not present, which implies even
+ * child is not present. Also, man 2 unlink states unlink can
+ * return ENOENT if a component in pathname does not
+ * exist or is a dangling symbolic link. So, invalidate both
+ * parent and child for both errno
+ */
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
}
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, local->loc.inode, buf);
- mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
- }
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ }
+
out:
- MDC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
}
-
int
-mdc_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata)
+mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = mdc_local_get (frame);
+ mdc_local_t *local = NULL;
- loc_copy (&local->loc, loc);
- local->xattr = dict_ref (xdata);
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
- STACK_WIND (frame, mdc_mkdir_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
+ STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
-
int
-mdc_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+mdc_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret != 0)
- goto out;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ /* if errno is ESTALE, parent is not present, which implies even
+ * child is not present. Also, man 2 rmdir states rmdir can
+ * return ENOENT if a directory component in pathname does not
+ * exist or is a dangling symbolic link. So, invalidate both
+ * parent and child for both errno
+ */
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
}
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, local->loc.inode, NULL);
- }
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
out:
- MDC_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
}
-
int
-mdc_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
- dict_t *xdata)
+mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = mdc_local_get (frame);
+ mdc_local_t *local = NULL;
- loc_copy (&local->loc, loc);
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
- STACK_WIND (frame, mdc_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
+ STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata);
+ return 0;
}
-
int
-mdc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+mdc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
- if (!local)
- goto out;
+ if (!local)
+ goto out;
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
}
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
out:
- MDC_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-mdc_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
- dict_t *xdata)
+mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+ char *name;
- local = mdc_local_get (frame);
+ name = gf_strdup(linkname);
+ if (name == NULL) {
+ goto wind;
+ }
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name);
+ goto wind;
+ }
- loc_copy (&local->loc, loc);
+ loc_copy(&local->loc, loc);
+ local->linkname = name;
- STACK_WIND (frame, mdc_rmdir_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
- loc, flag, xdata);
- return 0;
+wind:
+ STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
}
-
int
-mdc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+mdc_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (op_ret != 0)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc2.parent);
+ }
- if (!local)
- goto out;
+ goto out;
+ }
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
- }
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postoldparent,
+ local->incident_time);
+ }
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, local->loc.inode, buf);
- }
+ if (local->loc.inode) {
+ /* TODO: fix dht_rename() not to return linkfile
+ attributes before setting attributes here
+ */
+
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set(this, local->loc2.parent, postnewparent,
+ local->incident_time);
+ }
out:
- MDC_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
}
-
int
-mdc_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
+mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = mdc_local_get (frame);
+ mdc_local_t *local = NULL;
- loc_copy (&local->loc, loc);
+ local = mdc_local_get(frame, oldloc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
- local->linkname = gf_strdup (linkname);
-
- STACK_WIND (frame, mdc_symlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
- linkname, loc, umask, xdata);
- return 0;
+ STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
}
-
int
-mdc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
+mdc_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret != 0)
- goto out;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
-
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postoldparent);
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc2.parent);
}
- if (local->loc.inode) {
- /* TODO: fix dht_rename() not to return linkfile
- attributes before setting attributes here
- */
+ goto out;
+ }
- mdc_inode_iatt_set (this, local->loc.inode, NULL);
- }
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
- if (local->loc2.parent) {
- mdc_inode_iatt_set (this, local->loc2.parent, postnewparent);
- }
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set(this, local->loc2.parent, postparent,
+ local->incident_time);
+ }
out:
- MDC_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent, prenewparent,
- postnewparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-mdc_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, oldloc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->loc2, newloc);
-
- STACK_WIND (frame, mdc_rename_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
+ STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
}
-
int
-mdc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+mdc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
- if (!local)
- goto out;
+ if (!local)
+ goto out;
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, local->loc.inode, buf);
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
}
- if (local->loc2.parent) {
- mdc_inode_iatt_set (this, local->loc2.parent, postparent);
- }
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, inode, buf, local->incident_time);
+ }
out:
- MDC_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-mdc_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->loc2, newloc);
-
- STACK_WIND (frame, mdc_link_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
+ STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
}
-
-int
-mdc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+mdc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret != 0)
- goto out;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
- if (local->loc.parent) {
- mdc_inode_iatt_set (this, local->loc.parent, postparent);
- }
+ if (local->fd->flags & O_TRUNC) {
+ /* O_TRUNC modifies file size. Hence invalidate the
+ * cache entry to fetch latest attributes. */
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ }
- if (local->loc.inode) {
- mdc_inode_iatt_set (this, inode, buf);
- mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
- }
out:
- MDC_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ MDC_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-
-int
-mdc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+static int
+mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ if (!fd || !IA_ISREG(fd->inode->ia_type) || !(fd->flags & O_TRUNC)) {
+ goto out;
+ }
- loc_copy (&local->loc, loc);
- local->xattr = dict_ref (xdata);
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
- STACK_WIND (frame, mdc_create_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
+out:
+ STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
-
int
-mdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+mdc_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (op_ret != 0)
- goto out;
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- if (!local)
- goto out;
-
- mdc_inode_iatt_set (this, local->fd->inode, stbuf);
+ mdc_inode_iatt_set(this, local->fd->inode, stbuf, local->incident_time);
out:
- MDC_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
+ MDC_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
- local->fd = fd_ref (fd);
-
- STACK_WIND (frame, mdc_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+ STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+mdc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret == -1) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
out:
- MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
+mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = mdc_local_get (frame);
+ mdc_local_t *local = NULL;
- local->fd = fd_ref (fd);
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
- STACK_WIND (frame, mdc_writev_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
+ STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
}
-
int
-mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0) {
- mdc_inode_iatt_set (this, local->loc.inode, NULL);
- goto out;
- }
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+ mdc_inode_xatt_update(this, local->loc.inode, xdata);
out:
- MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int valid, dict_t *xdata)
+mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int valid, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+ dict_t *xattr_alloc = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = this->private;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ goto wind;
+ }
- loc_copy (&local->loc, loc);
+ loc_copy(&local->loc, loc);
- STACK_WIND (frame, mdc_setattr_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
-}
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+ }
+ }
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+ }
+ }
+
+wind:
+ STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+
+ if (xattr_alloc)
+ dict_unref(xattr_alloc);
+ return 0;
+}
int
-mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+mdc_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+ mdc_inode_xatt_update(this, local->fd->inode, xdata);
out:
- MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int valid, dict_t *xdata)
+mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int valid, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+ dict_t *xattr_alloc = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = this->private;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ goto wind;
+ }
- local->fd = fd_ref (fd);
+ local->fd = __fd_ref(fd);
- STACK_WIND (frame, mdc_fsetattr_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
-}
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+ }
+ }
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+ }
+ }
+
+wind:
+ STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ if (xattr_alloc)
+ dict_unref(xattr_alloc);
+ return 0;
+}
int
-mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+mdc_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
out:
- MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
+int
+mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+}
int
-mdc_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
- dict_t *xdata)
+mdc_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
- local = mdc_local_get (frame);
+ local = frame->local;
+ if (!local)
+ goto out;
- local->fd = fd_ref (fd);
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
- STACK_WIND (frame, mdc_fsync_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
- fd, datasync, xdata);
- return 0;
-}
+ mdc_inode_xatt_update(this, local->loc.inode, local->xattr);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
int
-mdc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xattr);
+ }
- if (op_ret != 0)
- goto out;
+ STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
- if (!local)
- goto out;
+ return 0;
+}
- mdc_inode_xatt_update (this, local->loc.inode, local->xattr);
+int
+mdc_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_xatt_update(this, local->fd->inode, local->xattr);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
out:
- MDC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ MDC_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr, int flags, dict_t *xdata)
+mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ local->xattr = dict_ref(xattr);
+ }
- loc_copy (&local->loc, loc);
- local->xattr = dict_ref (xattr);
+ STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata);
- STACK_WIND (frame, mdc_setxattr_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
- loc, xattr, flags, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (op_ret != 0)
- goto out;
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
- if (!local)
- goto out;
+ if (dict_get(xattr, "glusterfs.skip-cache")) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
+ "Skipping xattr update due to empty value");
+ goto out;
+ }
- mdc_inode_xatt_update (this, local->fd->inode, local->xattr);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
out:
- MDC_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *xattr, int flags, dict_t *xdata)
+mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ int ret;
+ int op_errno = ENODATA;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ gf_boolean_t key_satisfied = _gf_false;
- local = mdc_local_get (frame);
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ goto uncached;
+ }
- local->fd = fd_ref (fd);
- local->xattr = dict_ref (xattr);
+ loc_copy(&local->loc, loc);
- STACK_WIND (frame, mdc_fsetxattr_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
- fd, xattr, flags, xdata);
- return 0;
+ if (!is_mdc_key_satisfied(this, key)) {
+ goto uncached;
+ }
+ key_satisfied = _gf_true;
+
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get(xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+ MDC_STACK_UNWIND(getxattr, frame, ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ if (key_satisfied) {
+ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
+ return 0;
}
int
-mdc_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
+mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- local = frame->local;
- if (!local)
- goto out;
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- mdc_inode_xatt_update (this, local->loc.inode, xattr);
+ if (dict_get(xattr, "glusterfs.skip-cache")) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
+ "Skipping xattr update due to empty value");
+ goto out;
+ }
+
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
out:
- MDC_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+ MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
dict_t *xdata)
{
- int ret;
- int op_errno = ENODATA;
- mdc_local_t *local = NULL;
- dict_t *xattr = NULL;
+ int ret;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = ENODATA;
+ struct mdc_conf *conf = this->private;
+ gf_boolean_t key_satisfied = _gf_true;
- local = mdc_local_get (frame);
- if (!local)
- goto uncached;
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto uncached;
- loc_copy (&local->loc, loc);
+ local->fd = __fd_ref(fd);
- if (!is_mdc_key_satisfied (key))
- goto uncached;
+ if (!is_mdc_key_satisfied(this, key)) {
+ key_satisfied = _gf_false;
+ goto uncached;
+ }
- ret = mdc_inode_xatt_get (this, loc->inode, &xattr);
- if (ret != 0)
- goto uncached;
+ ret = mdc_inode_xatt_get(this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
- if (!xattr || dict_get (xattr, (char *)key)) {
- ret = -1;
- op_errno = ENODATA;
- }
+ if (!xattr || !dict_get(xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
- MDC_STACK_UNWIND (getxattr, frame, ret, op_errno, xattr, xdata);
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+ MDC_STACK_UNWIND(fgetxattr, frame, ret, op_errno, xattr, xdata);
- return 0;
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
uncached:
- STACK_WIND (frame, mdc_getxattr_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr,
- loc, key, xdata);
- return 0;
-}
+ if (key_satisfied) {
+ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (local->key)
+ mdc_inode_xatt_unset(this, local->loc.inode, local->key);
+ else
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+out:
+ MDC_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-mdc_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- mdc_local_t *local = NULL;
+ mdc_local_t *local = NULL;
+ int op_errno = ENODATA;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ char *name2;
+
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
- if (op_ret != 0)
- goto out;
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
- local = frame->local;
- if (!local)
- goto out;
+ loc_copy(&local->loc, loc);
+ local->key = name2;
- mdc_inode_xatt_update (this, local->fd->inode, xattr);
+ if (!is_mdc_key_satisfied(this, name))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+
+ if (!xattr || !dict_get(xattr, (char *)name)) {
+ ret = -1;
+ op_errno = ENODATA;
+
+ MDC_STACK_UNWIND(removexattr, frame, ret, op_errno, xdata);
+ } else {
+ STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
+
+int
+mdc_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ if (local->key)
+ mdc_inode_xatt_unset(this, local->fd->inode, local->key);
+ else
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
out:
- MDC_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ MDC_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
-
int
-mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
- dict_t *xdata)
+mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int ret;
- mdc_local_t *local = NULL;
- dict_t *xattr = NULL;
- int op_errno = ENODATA;
+ mdc_local_t *local = NULL;
+ int op_errno = ENODATA;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ char *name2;
- local = mdc_local_get (frame);
- if (!local)
- goto uncached;
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
- local->fd = fd_ref (fd);
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
- if (!is_mdc_key_satisfied (key))
- goto uncached;
+ local->fd = __fd_ref(fd);
+ local->key = name2;
- ret = mdc_inode_xatt_get (this, fd->inode, &xattr);
- if (ret != 0)
- goto uncached;
+ if (!is_mdc_key_satisfied(this, name))
+ goto uncached;
- if (!xattr || dict_get (xattr, (char *)key)) {
- ret = -1;
- op_errno = ENODATA;
- }
+ ret = mdc_inode_xatt_get(this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
- MDC_STACK_UNWIND (fgetxattr, frame, ret, op_errno, xattr, xdata);
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
- return 0;
+ if (!xattr || !dict_get(xattr, (char *)name)) {
+ ret = -1;
+ op_errno = ENODATA;
+
+ MDC_STACK_UNWIND(fremovexattr, frame, ret, op_errno, xdata);
+ } else {
+ STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
uncached:
- STACK_WIND (frame, mdc_fgetxattr_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr,
- fd, key, xdata);
- return 0;
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
}
+int32_t
+mdc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ /* Tell readdir-ahead to include these keys in xdata when it
+ * internally issues readdirp() in it's opendir_cbk */
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
int
-mdc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
+ gf_dirent_t *entry = NULL;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto unwind;
- if (op_ret <= 0)
- goto unwind;
+ if (op_ret <= 0) {
+ if ((op_ret == -1) && ((op_errno == ENOENT) || (op_errno == ESTALE)))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto unwind;
+ }
- list_for_each_entry (entry, &entries->list, list) {
- if (!entry->inode)
- continue;
- mdc_inode_iatt_set (this, entry->inode, &entry->d_stat);
- mdc_inode_xatt_set (this, entry->inode, entry->dict);
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
+ mdc_inode_iatt_set(this, entry->inode, &entry->d_stat,
+ local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, entry->inode, entry->dict);
}
+ }
unwind:
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
+ MDC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
-
int
-mdc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, dict_t *xdata)
+mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- dict_t *xattr_alloc = NULL;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto out;
+
+ local->fd = __fd_ref(fd);
+
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- if (!xdata)
- xdata = xattr_alloc = dict_new ();
- if (xdata)
- mdc_load_reqs (this, xdata);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
- STACK_WIND (frame, mdc_readdirp_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp,
- fd, size, offset, xdata);
- if (xattr_alloc)
- dict_unref (xattr_alloc);
- return 0;
+ return 0;
+out:
+ MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
}
int
mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *entries, dict_t *xdata)
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
int
-mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, dict_t *xdata)
+mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- int need_unref = 0;
- struct mdc_conf *conf = this->private;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
- if (!conf->force_readdirp) {
- STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, offset,
- xdata);
- return 0;
- }
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto unwind;
- if (!xdata) {
- xdata = dict_new ();
- need_unref = 1;
- }
+ local->fd = __fd_ref(fd);
+
+ if (!conf->force_readdirp) {
+ STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+ return 0;
+ }
- if (xdata)
- mdc_load_reqs (this, xdata);
+ xdata = mdc_prepare_request(this, local, xdata);
- STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
- xdata);
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- if (need_unref && xdata)
- dict_unref (xdata);
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
- return 0;
+ return 0;
+unwind:
+ MDC_STACK_UNWIND(readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
}
int
mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
out:
- MDC_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- return 0;
+ return 0;
}
-int mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
+int
+mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- mdc_local_t *local;
+ mdc_local_t *local;
- local = mdc_local_get(frame);
- local->fd = fd_ref(fd);
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
- STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
- xdata);
+ STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
- return 0;
+ return 0;
}
int
mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_local_t *local = NULL;
-
- local = frame->local;
+ mdc_local_t *local = NULL;
- if (op_ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local)
- goto out;
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
- mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
out:
- MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
+int
+mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- mdc_local_t *local;
+ mdc_local_t *local;
- local = mdc_local_get(frame);
- local->fd = fd_ref(fd);
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
- STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->discard, fd, offset, len,
- xdata);
+ STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
- return 0;
+ return 0;
}
int
-mdc_forget (xlator_t *this, inode_t *inode)
+mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- mdc_inode_wipe (this, inode);
+ mdc_local_t *local = NULL;
- return 0;
-}
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
int
-is_strpfx (const char *str1, const char *str2)
+mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- /* is one of the string a prefix of the other? */
- int i;
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int32_t
+mdc_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
- for (i = 0; str1[i] == str2[i]; i++) {
- if (!str1[i] || !str2[i])
- break;
- }
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
- return !(str1[i] && str2[i]);
+out:
+ MDC_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, buf, xdata);
+ return 0;
+}
+
+int32_t
+mdc_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto unwind;
+
+ loc_copy(&local->loc, loc);
+
+ STACK_WIND(frame, mdc_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
}
+int32_t
+mdc_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto unwind;
+
+ local->fd = __fd_ref(fd);
+
+ STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(fsyncdir, frame, -1, ENOMEM, NULL);
+ return 0;
+}
+
+int32_t
+mdc_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mdc_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto unwind;
+
+ loc_copy(&local->loc, loc);
+
+ STACK_WIND(frame, mdc_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(access, frame, -1, ENOMEM, NULL);
+ return 0;
+}
+
+int
+mdc_priv_dump(xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("stat_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_hit));
+ gf_proc_dump_write("stat_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_miss));
+ gf_proc_dump_write("xattr_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_hit));
+ gf_proc_dump_write("xattr_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_miss));
+ gf_proc_dump_write("nameless_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.nameless_lookup));
+ gf_proc_dump_write("negative_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.negative_lookup));
+ gf_proc_dump_write("stat_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_invals));
+ gf_proc_dump_write("xattr_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_invals));
+
+ return 0;
+}
+
+static int32_t
+mdc_dump_metrics(xlator_t *this, int fd)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ dprintf(fd, "%s.stat_cache_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_hit));
+ dprintf(fd, "%s.stat_cache_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_miss));
+ dprintf(fd, "%s.xattr_cache_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_hit));
+ dprintf(fd, "%s.xattr_cache_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_miss));
+ dprintf(fd, "%s.nameless_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.nameless_lookup));
+ dprintf(fd, "%s.negative_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.negative_lookup));
+ dprintf(fd, "%s.stat_cache_invalidations_received %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->mdc_counter.stat_invals));
+ dprintf(fd, "%s.xattr_cache_invalidations_received %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->mdc_counter.xattr_invals));
+out:
+ return 0;
+}
+
+int
+mdc_forget(xlator_t *this, inode_t *inode)
+{
+ mdc_inode_wipe(this, inode);
+
+ return 0;
+}
int
-mdc_key_load_set (struct mdc_key *keys, char *pattern, gf_boolean_t val)
+is_strpfx(const char *str1, const char *str2)
{
- struct mdc_key *key = NULL;
+ /* is one of the string a prefix of the other? */
+ int i;
- for (key = keys; key->name; key++) {
- if (is_strpfx (key->name, pattern))
- key->load = val;
- }
+ for (i = 0; str1[i] == str2[i]; i++) {
+ if (!str1[i] || !str2[i])
+ break;
+ }
- return 0;
+ return !(str1[i] && str2[i]);
}
+static int
+mdc_key_unload_all(struct mdc_conf *conf)
+{
+ conf->mdc_xattr_str = NULL;
+
+ return 0;
+}
int
-reconfigure (xlator_t *this, dict_t *options)
+mdc_xattr_list_populate(struct mdc_conf *conf, char *tmp_str)
{
- struct mdc_conf *conf = NULL;
+ char *mdc_xattr_str = NULL;
+ size_t max_size = 0;
+ int ret = 0;
+
+ max_size = SLEN(
+ "security.capability,security.selinux,security."
+ "ima," POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR
+ "," GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT
+ ","
+ "user.swift.metadata,user.DOSATTRIB,user.DosStream.*"
+ ",user.org.netatalk.Metadata,security.NTACL,"
+ "user.org.netatalk.ResourceFork") +
+ strlen(tmp_str) + 5; /*Some buffer bytes*/
+
+ mdc_xattr_str = GF_MALLOC(max_size, gf_common_mt_char);
+ GF_CHECK_ALLOC(mdc_xattr_str, ret, out);
+ mdc_xattr_str[0] = '\0';
+
+ if (conf->cache_capability)
+ strcat(mdc_xattr_str, "security.capability,");
+
+ if (conf->cache_selinux)
+ strcat(mdc_xattr_str, "security.selinux,");
- conf = this->private;
+ if (conf->cache_ima)
+ strcat(mdc_xattr_str, "security.ima,");
- GF_OPTION_RECONF ("md-cache-timeout", conf->timeout, options, int32, out);
+ if (conf->cache_posix_acl)
+ strcat(mdc_xattr_str,
+ POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR ",");
- GF_OPTION_RECONF ("cache-selinux", conf->cache_selinux, options, bool, out);
- mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
+ if (conf->cache_glusterfs_acl)
+ strcat(mdc_xattr_str, GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT ",");
- GF_OPTION_RECONF ("cache-posix-acl", conf->cache_posix_acl, options, bool, out);
- mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl);
+ if (conf->cache_swift_metadata)
+ strcat(mdc_xattr_str, "user.swift.metadata,");
- GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out);
+ if (conf->cache_samba_metadata)
+ strcat(mdc_xattr_str,
+ "user.DOSATTRIB,user.DosStream.*,"
+ "user.org.netatalk.Metadata,user.org.netatalk."
+ "ResourceFork,security.NTACL,");
+
+ strcat(mdc_xattr_str, tmp_str);
+
+ LOCK(&conf->lock);
+ {
+ /* This is not freed, else is_mdc_key_satisfied, which is
+ * called by every fop has to take lock, and will lead to
+ * lock contention
+ */
+ conf->mdc_xattr_str = mdc_xattr_str;
+ }
+ UNLOCK(&conf->lock);
out:
- return 0;
+ return ret;
}
-int32_t
-mem_acct_init (xlator_t *this)
+struct set {
+ inode_t *inode;
+ xlator_t *this;
+};
+
+static int
+mdc_inval_xatt(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ struct set *tmp1 = NULL;
+ int ret = 0;
+
+ tmp1 = (struct set *)tmp;
+ ret = mdc_inode_xatt_unset(tmp1->this, tmp1->inode, k);
+ return ret;
+}
+
+static int
+mdc_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ struct set tmp = {
+ 0,
+ };
+ inode_table_t *itable = NULL;
+ struct mdc_conf *conf = this->private;
+ uint64_t gen = 0;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (up_ci->flags & UP_PARENT_DENTRY_FLAGS) {
+ mdc_update_gfid_stat(this, &up_ci->p_stat);
+ if (up_ci->flags & UP_RENAME_FLAGS)
+ mdc_update_gfid_stat(this, &up_ci->oldp_stat);
+ }
+
+ if (up_ci->flags & UP_EXPLICIT_LOOKUP) {
+ mdc_inode_set_need_lookup(this, inode, _gf_true);
+ goto out;
+ }
+
+ if (up_ci->flags &
+ (UP_NLINK | UP_RENAME_FLAGS | UP_FORGET | UP_INVAL_ATTR)) {
+ mdc_inode_iatt_invalidate(this, inode);
+ mdc_inode_xatt_invalidate(this, inode);
+ GF_ATOMIC_INC(conf->mdc_counter.stat_invals);
+ goto out;
+ }
+
+ if (up_ci->flags & IATT_UPDATE_FLAGS) {
+ gen = mdc_inc_generation(this, inode);
+ ret = mdc_inode_iatt_set_validate(this, inode, NULL, &up_ci->stat,
+ _gf_false, gen);
+ /* one of the scenarios where ret < 0 is when this invalidate
+ * is older than the current stat, in that case do not
+ * update the xattrs as well
+ */
+ if (ret < 0)
+ goto out;
+ GF_ATOMIC_INC(conf->mdc_counter.stat_invals);
+ }
+
+ if (up_ci->flags & UP_XATTR) {
+ if (up_ci->dict)
+ ret = mdc_inode_xatt_update(this, inode, up_ci->dict);
+ else
+ ret = mdc_inode_xatt_invalidate(this, inode);
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_invals);
+ } else if (up_ci->flags & UP_XATTR_RM) {
+ tmp.inode = inode;
+ tmp.this = this;
+ ret = dict_foreach(up_ci->dict, mdc_inval_xatt, &tmp);
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_invals);
+ }
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ return ret;
+}
+
+struct mdc_ipc {
+ xlator_t *this;
+ dict_t *xattr;
+};
+
+static int
+mdc_send_xattrs_cbk(int ret, call_frame_t *frame, void *data)
{
- int ret = -1;
+ struct mdc_ipc *tmp = data;
- ret = xlator_mem_acct_init (this, gf_mdc_mt_end + 1);
- return ret;
+ if (ret < 0) {
+ mdc_key_unload_all(THIS->private);
+ gf_msg("md-cache", GF_LOG_INFO, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for all xattrs, as registering for "
+ "xattr cache invalidation failed");
+ }
+ STACK_DESTROY(frame->root);
+ dict_unref(tmp->xattr);
+ GF_FREE(tmp);
+
+ return 0;
+}
+
+static int
+mdc_send_xattrs(void *data)
+{
+ int ret = 0;
+ struct mdc_ipc *tmp = data;
+
+ ret = syncop_ipc(FIRST_CHILD(tmp->this), GF_IPC_TARGET_UPCALL, tmp->xattr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_msg(tmp->this->name, GF_LOG_WARNING, errno,
+ MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ "Registering the list "
+ "of xattrs that needs invalidaton, with upcall, failed");
+ }
+
+ return ret;
+}
+
+static int
+mdc_register_xattr_inval(xlator_t *this)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+ call_frame_t *frame = NULL;
+ struct mdc_ipc *data = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ if (!conf->mdc_invalidation) {
+ UNLOCK(&conf->lock);
+ goto out;
+ }
+ }
+ UNLOCK(&conf->lock);
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (!mdc_load_reqs(this, xattr)) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to populate cache entries");
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to create the frame");
+ ret = -1;
+ goto out;
+ }
+
+ data = GF_CALLOC(1, sizeof(struct mdc_ipc), gf_mdc_mt_mdc_ipc);
+ if (!data) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ data->this = this;
+ data->xattr = xattr;
+ ret = synctask_new(this->ctx->env, mdc_send_xattrs, mdc_send_xattrs_cbk,
+ frame, data);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ "Registering the list "
+ "of xattrs that needs invalidaton, with upcall, failed");
+ }
+
+out:
+ if (ret < 0) {
+ mdc_key_unload_all(conf);
+ if (xattr)
+ dict_unref(xattr);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ GF_FREE(data);
+ gf_msg(this->name, GF_LOG_INFO, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for all xattrs, as registering for "
+ "xattr cache invalidation failed");
+ }
+
+ return ret;
}
int
-init (xlator_t *this)
+mdc_reconfigure(xlator_t *this, dict_t *options)
{
- struct mdc_conf *conf = NULL;
+ struct mdc_conf *conf = NULL;
+ int timeout = 0, ret = 0;
+ char *tmp_str = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("md-cache-timeout", timeout, options, int32, out);
+
+ GF_OPTION_RECONF("cache-selinux", conf->cache_selinux, options, bool, out);
+
+ GF_OPTION_RECONF("cache-capability-xattrs", conf->cache_capability, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-ima-xattrs", conf->cache_ima, options, bool, out);
- conf = GF_CALLOC (sizeof (*conf), 1, gf_mdc_mt_mdc_conf_t);
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- return -1;
- }
+ GF_OPTION_RECONF("cache-posix-acl", conf->cache_posix_acl, options, bool,
+ out);
- GF_OPTION_INIT ("md-cache-timeout", conf->timeout, int32, out);
+ GF_OPTION_RECONF("cache-glusterfs-acl", conf->cache_glusterfs_acl, options,
+ bool, out);
- GF_OPTION_INIT ("cache-selinux", conf->cache_selinux, bool, out);
- mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
+ GF_OPTION_RECONF("cache-swift-metadata", conf->cache_swift_metadata,
+ options, bool, out);
- GF_OPTION_INIT ("cache-posix-acl", conf->cache_posix_acl, bool, out);
- mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl);
+ GF_OPTION_RECONF("cache-samba-metadata", conf->cache_samba_metadata,
+ options, bool, out);
- GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+ GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool,
+ out);
+
+ GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options,
+ bool, out);
+
+ GF_OPTION_RECONF("global-cache-invalidation", conf->global_invalidation,
+ options, bool, out);
+
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+ GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out);
+
+ GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out);
+
+ ret = mdc_xattr_list_populate(conf, tmp_str);
+ if (ret < 0)
+ goto out;
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+ ret = mdc_register_xattr_inval(this);
out:
- this->private = conf;
+ return ret;
+}
- return 0;
+int32_t
+mdc_mem_acct_init(xlator_t *this)
+{
+ return xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
}
+int
+mdc_init(xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ uint32_t timeout = 0;
+ char *tmp_str = NULL;
+
+ conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t);
+ if (!conf) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ return -1;
+ }
+
+ LOCK_INIT(&conf->lock);
+
+ GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out);
+
+ GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out);
+
+ GF_OPTION_INIT("cache-capability-xattrs", conf->cache_capability, bool,
+ out);
+
+ GF_OPTION_INIT("cache-ima-xattrs", conf->cache_ima, bool, out);
+
+ GF_OPTION_INIT("cache-posix-acl", conf->cache_posix_acl, bool, out);
+
+ GF_OPTION_INIT("cache-glusterfs-acl", conf->cache_glusterfs_acl, bool, out);
+
+ GF_OPTION_INIT("cache-swift-metadata", conf->cache_swift_metadata, bool,
+ out);
+
+ GF_OPTION_INIT("cache-samba-metadata", conf->cache_samba_metadata, bool,
+ out);
+
+ GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+
+ GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out);
+
+ GF_OPTION_INIT("global-cache-invalidation", conf->global_invalidation, bool,
+ out);
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ pthread_mutex_init(&conf->statfs_cache.lock, NULL);
+ GF_OPTION_INIT("md-cache-statfs", conf->cache_statfs, bool, out);
+
+ GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out);
+ mdc_xattr_list_populate(conf, tmp_str);
+
+ conf->last_child_down = gf_time();
+ conf->statfs_cache.last_refreshed = (time_t)-1;
+
+ /* initialize gf_atomic_t counters */
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_hit, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_miss, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.negative_lookup, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.nameless_lookup, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_invals, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_invals, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.need_lookup, 0);
+ GF_ATOMIC_INIT(conf->generation, 0);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+out:
+ this->private = conf;
+
+ return 0;
+}
void
-fini (xlator_t *this)
+mdc_update_child_down_time(xlator_t *this, time_t now)
{
- return;
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->last_child_down = now;
+ }
+ UNLOCK(&conf->lock);
}
+int
+mdc_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ mdc_update_child_down_time(this, gf_time());
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->mdc_invalidation)
+ ret = mdc_invalidate(this, data);
+ break;
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ ret = mdc_register_xattr_inval(this);
+ break;
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
-struct xlator_fops fops = {
- .lookup = mdc_lookup,
- .stat = mdc_stat,
- .fstat = mdc_fstat,
- .truncate = mdc_truncate,
- .ftruncate = mdc_ftruncate,
- .mknod = mdc_mknod,
- .mkdir = mdc_mkdir,
- .unlink = mdc_unlink,
- .rmdir = mdc_rmdir,
- .symlink = mdc_symlink,
- .rename = mdc_rename,
- .link = mdc_link,
- .create = mdc_create,
- .readv = mdc_readv,
- .writev = mdc_writev,
- .setattr = mdc_setattr,
- .fsetattr = mdc_fsetattr,
- .fsync = mdc_fsync,
- .setxattr = mdc_setxattr,
- .fsetxattr = mdc_fsetxattr,
- .getxattr = mdc_getxattr,
- .fgetxattr = mdc_fgetxattr,
- .readdirp = mdc_readdirp,
- .readdir = mdc_readdir,
- .fallocate = mdc_fallocate,
- .discard = mdc_discard,
+void
+mdc_fini(xlator_t *this)
+{
+ GF_FREE(this->private);
+}
+
+struct xlator_fops mdc_fops = {
+ .lookup = mdc_lookup,
+ .stat = mdc_stat,
+ .fstat = mdc_fstat,
+ .truncate = mdc_truncate,
+ .ftruncate = mdc_ftruncate,
+ .mknod = mdc_mknod,
+ .mkdir = mdc_mkdir,
+ .unlink = mdc_unlink,
+ .rmdir = mdc_rmdir,
+ .symlink = mdc_symlink,
+ .rename = mdc_rename,
+ .link = mdc_link,
+ .create = mdc_create,
+ .open = mdc_open,
+ .readv = mdc_readv,
+ .writev = mdc_writev,
+ .setattr = mdc_setattr,
+ .fsetattr = mdc_fsetattr,
+ .fsync = mdc_fsync,
+ .setxattr = mdc_setxattr,
+ .fsetxattr = mdc_fsetxattr,
+ .getxattr = mdc_getxattr,
+ .fgetxattr = mdc_fgetxattr,
+ .removexattr = mdc_removexattr,
+ .fremovexattr = mdc_fremovexattr,
+ .opendir = mdc_opendir,
+ .readdirp = mdc_readdirp,
+ .readdir = mdc_readdir,
+ .fallocate = mdc_fallocate,
+ .discard = mdc_discard,
+ .zerofill = mdc_zerofill,
+ .statfs = mdc_statfs,
+ .readlink = mdc_readlink,
+ .fsyncdir = mdc_fsyncdir,
+ .access = mdc_access,
};
+struct xlator_cbks mdc_cbks = {
+ .forget = mdc_forget,
+};
+
+struct xlator_dumpops mdc_dumpops = {
+ .priv = mdc_priv_dump,
+};
-struct xlator_cbks cbks = {
- .forget = mdc_forget,
+struct volume_options mdc_options[] = {
+ {
+ .key = {"md-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable md-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"cache-selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache selinux xattr(security.selinux) on client side",
+ },
+ {
+ .key = {"cache-capability-xattrs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache capability xattr(security.capability) on "
+ "client side",
+ },
+ {
+ .key = {"cache-ima-xattrs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache Linux integrity subsystem xattr(security.ima) "
+ "on client side",
+ },
+ {
+ .key = {"cache-swift-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache swift metadata (user.swift.metadata xattr)",
+ },
+ {
+ .key = {"cache-samba-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL,"
+ " org.netatalk.Metadata, org.netatalk.ResourceFork, "
+ "and user.DosStream. xattrs)",
+ },
+ {
+ .key = {"cache-posix-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache posix ACL xattrs (system.posix_acl_access, "
+ "system.posix_acl_default) on client side",
+ },
+ {
+ .key = {"cache-glusterfs-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache virtual glusterfs ACL xattrs "
+ "(glusterfs.posix.acl, glusterfs.posix.default_acl) "
+ "on client side",
+ },
+ {
+ .key = {"md-cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 600,
+ .default_value = SITE_H_MD_CACHE_TIMEOUT,
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Time period after which cache has to be refreshed",
+ },
+ {
+ .key = {"force-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Convert all readdir requests to readdirplus to "
+ "collect stat info on each entry.",
+ },
+ {
+ .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "When \"on\", invalidates/updates the metadata cache,"
+ " on receiving the cache-invalidation notifications",
+ },
+ {
+ .key = {"global-cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description =
+ "When \"on\", purges all read caches in kernel and glusterfs stack "
+ "whenever a stat change is detected. Stat changes can be detected "
+ "while processing responses to file operations (fop) or through "
+ "upcall notifications. Since purging caches can be an expensive "
+ "operation, it's advised to have this option \"on\" only when a "
+ "file "
+ "can be accessed from multiple different Glusterfs mounts and "
+ "caches across these different mounts are required to be coherent. "
+ "If a file is not accessed across different mounts "
+ "(simple example is having only one mount for a volume), its "
+ "advised to keep "
+ "this option \"off\" as all file modifications go through caches "
+ "keeping them "
+ "coherent. This option overrides value of "
+ "performance.cache-invalidation.",
+ },
+ {
+ .key = {"md-cache-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache statfs information of filesystem on the client",
+ },
+ {
+ .key = {"xattr-cache-list"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "A comma separated list of xattrs that shall be "
+ "cached by md-cache. The only wildcard allowed is '*'",
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"md-cache"},
+ .description = "Enable/Disable md cache translator"},
+ {.key = {NULL}},
};
-struct volume_options options[] = {
- { .key = {"cache-selinux"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
- },
- { .key = {"cache-posix-acl"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
- },
- { .key = {"md-cache-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 60,
- .default_value = "1",
- .description = "Time period after which cache has to be refreshed",
- },
- { .key = {"force-readdirp"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Convert all readdir requests to readdirplus to "
- "collect stat info on each entry.",
- },
- { .key = {NULL} },
+xlator_api_t xlator_api = {
+ .init = mdc_init,
+ .fini = mdc_fini,
+ .notify = mdc_notify,
+ .reconfigure = mdc_reconfigure,
+ .mem_acct_init = mdc_mem_acct_init,
+ .dump_metrics = mdc_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &mdc_dumpops,
+ .fops = &mdc_fops,
+ .cbks = &mdc_cbks,
+ .options = mdc_options,
+ .identifier = "md-cache",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/symlink-cache/Makefile.am b/xlators/performance/nl-cache/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/xlators/performance/symlink-cache/Makefile.am
+++ b/xlators/performance/nl-cache/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/performance/nl-cache/src/Makefile.am b/xlators/performance/nl-cache/src/Makefile.am
new file mode 100644
index 00000000000..c44ce871627
--- /dev/null
+++ b/xlators/performance/nl-cache/src/Makefile.am
@@ -0,0 +1,12 @@
+xlator_LTLIBRARIES = nl-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+nl_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+nl_cache_la_SOURCES = nl-cache.c nl-cache-helper.c
+nl_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+noinst_HEADERS = nl-cache.h nl-cache-mem-types.h nl-cache-messages.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/timer-wheel
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+CLEANFILES =
diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c
new file mode 100644
index 00000000000..29b99b5b8ea
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache-helper.c
@@ -0,0 +1,1201 @@
+/*
+ * Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "nl-cache.h"
+#include "timer-wheel.h"
+#include <glusterfs/statedump.h>
+
+/* Caching guidelines:
+ * This xlator serves negative lookup(ENOENT lookups) from the cache,
+ * there by making create faster.
+ * What is cached?
+ * Negative lookup cache is stored for each directory, and has 2 entries:
+ * - Negative entries: Populated only when lookup/stat returns ENOENT.
+ * Fuse mostly sends only one lookup before create, hence negative entry
+ * cache is almost useless. But for SMB access, multiple lookups/stats
+ * are sent before creating the file. Hence the negative entry cache.
+ * It can exist even when the positive entry cache is invalid. It also
+ * has the entries that were deleted from this directory.
+ * Freed on receiving upcall(with dentry change flag) or on expiring
+ * timeout of the cache.
+ *
+ * - Positive entries: Populated as a part of readdirp, and as a part of
+ * mkdir followed by creates inside that directory. Lookups and other
+ * fops do not populate the positive entry (as it can grow long and is
+ * of no value add)
+ * Freed on receiving upcall(with dentry change flag) or on expiring
+ * timeout of the cache.
+ *
+ * Data structures to store cache?
+ * The cache of any directory is stored in the inode_ctx of the directory.
+ * Negative entries are stored as list of strings.
+ * Search - O(n)
+ * Add - O(1)
+ * Delete - O(n) - as it has to be searched before deleting
+ * Positive entries are stored as a list, each list node has a pointer
+ * to the inode of the positive entry or the name of the entry.
+ * Since the client side inode table already will have inodes for
+ * positive entries, we just take a ref of that inode and store as
+ * positive entry cache. In cases like hardlinks and readdirp where
+ * inode is NULL, we store the names.
+ * Name Search - O(n)
+ * Inode Search - O(1) - Actually complexity of inode_find()
+ * Name/inode Add - O(1)
+ * Name Delete - O(n)
+ * Inode Delete - O(1)
+ *
+ * Locking order:
+ *
+ * TODO:
+ * - Fill Positive entries on readdir/p, after which in lookup_cbk check if the
+ * name is in PE and replace it with inode.
+ * - fini, PARENET_DOWN, disable caching
+ * - Virtual setxattr to dump the inode_ctx, to ease debugging
+ * - Handle dht_nuke xattr: clear all cache
+ * - Special handling for .meta and .trashcan?
+ */
+
+int
+__nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx);
+int
+__nlc_add_to_lru(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx);
+void
+nlc_remove_from_lru(xlator_t *this, inode_t *inode);
+void
+__nlc_inode_ctx_timer_delete(xlator_t *this, nlc_ctx_t *nlc_ctx);
+gf_boolean_t
+__nlc_search_ne(nlc_ctx_t *nlc_ctx, const char *name);
+void
+__nlc_free_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe);
+void
+__nlc_free_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne);
+
+static int32_t
+nlc_get_cache_timeout(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* Cache timeout is generally not meant to be changed often,
+ * once set, hence not within locks */
+ return conf->cache_timeout;
+}
+
+static gf_boolean_t
+__nlc_is_cache_valid(xlator_t *this, nlc_ctx_t *nlc_ctx)
+{
+ nlc_conf_t *conf = NULL;
+ time_t last_val_time;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO(this->name, nlc_ctx, out);
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ last_val_time = conf->last_child_down;
+ }
+ UNLOCK(&conf->lock);
+
+ if ((last_val_time <= nlc_ctx->cache_time) && (nlc_ctx->cache_time != 0))
+ ret = _gf_true;
+out:
+ return ret;
+}
+
+void
+nlc_update_child_down_time(xlator_t *this, time_t now)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->last_child_down = now;
+ }
+ UNLOCK(&conf->lock);
+
+ return;
+}
+
+void
+nlc_disable_cache(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->disable_cache = _gf_true;
+ }
+ UNLOCK(&conf->lock);
+
+ return;
+}
+
+static int
+__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+{
+ int ret = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+ uint64_t nlc_ctx_int = 0;
+ uint64_t nlc_pe_int = 0;
+
+ ret = __inode_ctx_get2(inode, this, &nlc_ctx_int, &nlc_pe_int);
+ if (ret == 0 && nlc_ctx_p) {
+ nlc_ctx = (void *)(long)(nlc_ctx_int);
+ *nlc_ctx_p = nlc_ctx;
+ }
+ return ret;
+}
+
+static int
+nlc_inode_ctx_set(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx,
+ nlc_pe_t *nlc_pe_p)
+{
+ uint64_t ctx1, ctx2;
+ int ret = -1;
+
+ ctx1 = (uint64_t)(uintptr_t)nlc_ctx;
+ ctx2 = (uint64_t)(uintptr_t)nlc_pe_p;
+
+ /* The caller may choose to set one of the ctxs, hence check
+ * if the ctx1/2 is non zero and then send the address. If we
+ * blindly send the address of both the ctxs, it may reset the
+ * ctx the caller had sent NULL(intended as leave untouched) for.*/
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set2(inode, this, ctx1 ? &ctx1 : 0, ctx2 ? &ctx2 : 0);
+ }
+ UNLOCK(&inode->lock);
+ return ret;
+}
+
+static void
+nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+{
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0,
+ "inode ctx get failed for "
+ "inode:%p",
+ inode);
+ }
+ UNLOCK(&inode->lock);
+
+ return;
+}
+
+static void
+__nlc_inode_clear_entries(xlator_t *this, nlc_ctx_t *nlc_ctx)
+{
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp1 = NULL;
+
+ if (!nlc_ctx)
+ goto out;
+
+ if (IS_PE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ __nlc_free_pe(this, nlc_ctx, pe);
+ }
+
+ if (IS_NE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(ne, tmp1, &nlc_ctx->ne, list)
+ {
+ __nlc_free_ne(this, nlc_ctx, ne);
+ }
+
+ nlc_ctx->cache_time = 0;
+ nlc_ctx->state = 0;
+ GF_ASSERT(nlc_ctx->cache_size == sizeof(*nlc_ctx));
+ GF_ASSERT(nlc_ctx->refd_inodes == 0);
+out:
+ return;
+}
+
+static void
+nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
+{
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ /* The cache/nlc_ctx can be invalid for 2 reasons:
+ * - Because of a child-down/timer expiry, cache is
+ * invalid but the nlc_ctx is not yet cleaned up.
+ * - nlc_ctx is cleaned up, because of invalidations
+ * or lru prune etc.*/
+
+ /* If the cache is present but invalid, clear the cache and
+ * reset the timer. */
+ __nlc_inode_clear_entries(this, nlc_ctx);
+
+ /* If timer is present, then it is already part of lru as well
+ * Hence reset the timer and return.*/
+ if (nlc_ctx->timer) {
+ gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer,
+ conf->cache_timeout);
+ nlc_ctx->cache_time = gf_time();
+ goto unlock;
+ }
+
+ /* If timer was NULL, the nlc_ctx is already cleanedup,
+ * and we need to start timer and add to lru, so that it is
+ * ready to cache entries a fresh */
+ ret = __nlc_inode_ctx_timer_start(this, inode, nlc_ctx);
+ if (ret < 0)
+ goto unlock;
+
+ ret = __nlc_add_to_lru(this, inode, nlc_ctx);
+ if (ret < 0) {
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+static nlc_ctx_t *
+nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+{
+ uint64_t ctx;
+ int ret = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&inode->lock);
+ {
+ ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (nlc_ctx)
+ goto unlock;
+
+ nlc_ctx = GF_CALLOC(sizeof(*nlc_ctx), 1, gf_nlc_mt_nlc_ctx_t);
+ if (!nlc_ctx)
+ goto unlock;
+
+ LOCK_INIT(&nlc_ctx->lock);
+ INIT_LIST_HEAD(&nlc_ctx->pe);
+ INIT_LIST_HEAD(&nlc_ctx->ne);
+
+ ret = __nlc_inode_ctx_timer_start(this, inode, nlc_ctx);
+ if (ret < 0)
+ goto unlock;
+
+ ret = __nlc_add_to_lru(this, inode, nlc_ctx);
+ if (ret < 0) {
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+ goto unlock;
+ }
+
+ ctx = (uint64_t)(uintptr_t)nlc_ctx;
+ ret = __inode_ctx_set2(inode, this, &ctx, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, NLC_MSG_NO_MEMORY,
+ "inode ctx set failed");
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+ nlc_remove_from_lru(this, inode);
+ goto unlock;
+ }
+
+ /*TODO: also sizeof (gf_tw_timer_list) + nlc_timer_data_t ?*/
+ nlc_ctx->cache_size = sizeof(*nlc_ctx);
+ GF_ATOMIC_ADD(conf->current_cache_size, nlc_ctx->cache_size);
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (ret == 0 && nlc_ctx_p) {
+ *nlc_ctx_p = nlc_ctx;
+ nlc_init_invalid_ctx(this, inode, nlc_ctx);
+ }
+
+ if (ret < 0 && nlc_ctx) {
+ LOCK_DESTROY(&nlc_ctx->lock);
+ GF_FREE(nlc_ctx);
+ nlc_ctx = NULL;
+ goto out;
+ }
+
+out:
+ return nlc_ctx;
+}
+
+nlc_local_t *
+nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, loc_t *loc2)
+{
+ nlc_local_t *local = NULL;
+
+ local = GF_CALLOC(sizeof(*local), 1, gf_nlc_mt_nlc_local_t);
+ if (!local)
+ goto out;
+
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (loc2)
+ loc_copy(&local->loc2, loc2);
+
+ local->fop = fop;
+ frame->local = local;
+out:
+ return local;
+}
+
+void
+nlc_local_wipe(xlator_t *this, nlc_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe(&local->loc);
+
+ loc_wipe(&local->loc2);
+
+ GF_FREE(local);
+out:
+ return;
+}
+
+static void
+__nlc_set_dir_state(nlc_ctx_t *nlc_ctx, uint64_t new_state)
+{
+ nlc_ctx->state |= new_state;
+
+ return;
+}
+
+void
+nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ __nlc_set_dir_state(nlc_ctx, state);
+ }
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+static void
+nlc_cache_timeout_handler(struct gf_tw_timer_list *timer, void *data,
+ unsigned long calltime)
+{
+ nlc_timer_data_t *tmp = data;
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ /* Taking nlc_ctx->lock will lead to deadlock, hence updating
+ * the cache is invalid outside of lock, instead of clear_cache.
+ * Since cache_time is assigned outside of lock, the value can
+ * be invalid for short time, this may result in false negative
+ * which is better than deadlock */
+ nlc_ctx->cache_time = 0;
+out:
+ return;
+}
+
+void
+__nlc_inode_ctx_timer_delete(xlator_t *this, nlc_ctx_t *nlc_ctx)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (nlc_ctx->timer)
+ gf_tw_del_timer(conf->timer_wheel, nlc_ctx->timer);
+
+ if (nlc_ctx->timer_data) {
+ inode_unref(nlc_ctx->timer_data->inode);
+ GF_FREE(nlc_ctx->timer_data);
+ nlc_ctx->timer_data = NULL;
+ }
+
+ GF_FREE(nlc_ctx->timer);
+ nlc_ctx->timer = NULL;
+
+ return;
+}
+
+int
+__nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
+{
+ struct gf_tw_timer_list *timer = NULL;
+ nlc_timer_data_t *tmp = NULL;
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ /* We are taking inode_table->lock within inode->lock
+ * as the only other caller which takes inode->lock within
+ * inode_table->lock and cause deadlock is inode_table_destroy.
+ * Hopefully, there can be no fop when inode_table_destroy is
+ * being called. */
+ tmp = GF_CALLOC(1, sizeof(*tmp), gf_nlc_mt_nlc_timer_data_t);
+ if (!tmp)
+ goto out;
+ tmp->inode = inode_ref(inode);
+ tmp->this = this;
+
+ timer = GF_CALLOC(1, sizeof(*timer), gf_common_mt_tw_timer_list);
+ if (!timer)
+ goto out;
+
+ INIT_LIST_HEAD(&timer->entry);
+ timer->expires = nlc_get_cache_timeout(this);
+ timer->function = nlc_cache_timeout_handler;
+ timer->data = tmp;
+ nlc_ctx->timer = timer;
+ nlc_ctx->timer_data = tmp;
+ gf_tw_add_timer(conf->timer_wheel, timer);
+
+ nlc_ctx->cache_time = gf_time();
+ gf_msg_trace(this->name, 0,
+ "Registering timer:%p, inode:%p, "
+ "gfid:%s",
+ timer, inode, uuid_utoa(inode->gfid));
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ if (tmp && tmp->inode)
+ inode_unref(tmp->inode);
+ GF_FREE(tmp);
+ GF_FREE(timer);
+ }
+
+ return ret;
+}
+
+int
+__nlc_add_to_lru(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
+{
+ nlc_lru_node_t *lru_ino = NULL;
+ uint64_t nlc_pe_int = 0;
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ lru_ino = GF_CALLOC(1, sizeof(*lru_ino), gf_nlc_mt_nlc_lru_node);
+ if (!lru_ino)
+ goto out;
+
+ INIT_LIST_HEAD(&lru_ino->list);
+ lru_ino->inode = inode_ref(inode);
+ LOCK(&conf->lock);
+ {
+ list_add_tail(&lru_ino->list, &conf->lru);
+ }
+ UNLOCK(&conf->lock);
+
+ nlc_ctx->refd_inodes = 0;
+ ret = __inode_ctx_get2(inode, this, NULL, &nlc_pe_int);
+ if (nlc_pe_int == 0)
+ GF_ATOMIC_ADD(conf->refd_inodes, 1);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+void
+nlc_remove_from_lru(xlator_t *this, inode_t *inode)
+{
+ nlc_lru_node_t *lru_node = NULL;
+ nlc_lru_node_t *tmp = NULL;
+ nlc_lru_node_t *tmp1 = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ list_for_each_entry_safe(lru_node, tmp, &conf->lru, list)
+ {
+ if (inode == lru_node->inode) {
+ list_del(&lru_node->list);
+ tmp1 = lru_node;
+ break;
+ }
+ }
+ }
+ UNLOCK(&conf->lock);
+
+ if (tmp1) {
+ inode_unref(tmp1->inode);
+ GF_FREE(tmp1);
+ }
+
+ return;
+}
+
+void
+nlc_lru_prune(xlator_t *this, inode_t *inode)
+{
+ nlc_lru_node_t *lru_node = NULL;
+ nlc_lru_node_t *prune_node = NULL;
+ nlc_lru_node_t *tmp = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ if ((GF_ATOMIC_GET(conf->refd_inodes) < conf->inode_limit) &&
+ (GF_ATOMIC_GET(conf->current_cache_size) < conf->cache_size))
+ goto unlock;
+
+ list_for_each_entry_safe(lru_node, tmp, &conf->lru, list)
+ {
+ list_del(&lru_node->list);
+ prune_node = lru_node;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&conf->lock);
+
+ if (prune_node) {
+ nlc_inode_clear_cache(this, prune_node->inode, NLC_LRU_PRUNE);
+ inode_unref(prune_node->inode);
+ GF_FREE(prune_node);
+ }
+ return;
+}
+
+void
+nlc_clear_all_cache(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+ struct list_head clear_list;
+ nlc_lru_node_t *prune_node = NULL;
+ nlc_lru_node_t *tmp = NULL;
+
+ conf = this->private;
+
+ INIT_LIST_HEAD(&clear_list);
+
+ LOCK(&conf->lock);
+ {
+ list_replace_init(&conf->lru, &clear_list);
+ }
+ UNLOCK(&conf->lock);
+
+ list_for_each_entry_safe(prune_node, tmp, &clear_list, list)
+ {
+ list_del(&prune_node->list);
+ nlc_inode_clear_cache(this, prune_node->inode, NLC_LRU_PRUNE);
+ inode_unref(prune_node->inode);
+ GF_FREE(prune_node);
+ }
+
+ return;
+}
+
+void
+__nlc_free_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe)
+{
+ uint64_t pe_int = 0;
+ nlc_conf_t *conf = NULL;
+ uint64_t nlc_ctx_int = 0;
+
+ conf = this->private;
+
+ if (pe->inode) {
+ inode_ctx_reset1(pe->inode, this, &pe_int);
+ inode_ctx_get2(pe->inode, this, &nlc_ctx_int, NULL);
+ inode_unref(pe->inode);
+ }
+ list_del(&pe->list);
+
+ nlc_ctx->cache_size -= sizeof(*pe) + sizeof(pe->name);
+ GF_ATOMIC_SUB(conf->current_cache_size, (sizeof(*pe) + sizeof(pe->name)));
+
+ nlc_ctx->refd_inodes -= 1;
+ if (nlc_ctx_int == 0)
+ GF_ATOMIC_SUB(conf->refd_inodes, 1);
+
+ GF_FREE(pe->name);
+ GF_FREE(pe);
+
+ return;
+}
+
+void
+__nlc_free_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ list_del(&ne->list);
+ GF_FREE(ne->name);
+ GF_FREE(ne);
+
+ nlc_ctx->cache_size -= sizeof(*ne) + sizeof(ne->name);
+ GF_ATOMIC_SUB(conf->current_cache_size, (sizeof(*ne) + sizeof(ne->name)));
+
+ return;
+}
+
+void
+nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+
+ __nlc_inode_clear_entries(this, nlc_ctx);
+ }
+ UNLOCK(&nlc_ctx->lock);
+
+ if (reason != NLC_LRU_PRUNE)
+ nlc_remove_from_lru(this, inode);
+
+out:
+ return;
+}
+
+static void
+__nlc_del_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, inode_t *entry_ino,
+ const char *name, gf_boolean_t multilink)
+{
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+ gf_boolean_t found = _gf_false;
+ uint64_t pe_int = 0;
+
+ if (!IS_PE_VALID(nlc_ctx->state))
+ goto out;
+
+ if (!entry_ino)
+ goto name_search;
+
+ /* If there are hardlinks first search names, followed by inodes */
+ if (multilink) {
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = _gf_true;
+ goto out;
+ }
+ }
+ inode_ctx_reset1(entry_ino, this, &pe_int);
+ if (pe_int) {
+ pe = (void *)(long)(pe_int);
+ found = _gf_true;
+ goto out;
+ }
+ goto out;
+ }
+
+ inode_ctx_reset1(entry_ino, this, &pe_int);
+ if (pe_int) {
+ pe = (void *)(long)(pe_int);
+ found = _gf_true;
+ goto out;
+ }
+
+name_search:
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = _gf_true;
+ break;
+ /* TODO: can there be duplicates? */
+ }
+ }
+
+out:
+ if (found)
+ __nlc_free_pe(this, nlc_ctx, pe);
+
+ return;
+}
+
+static void
+__nlc_del_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, const char *name)
+{
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp = NULL;
+
+ if (!IS_NE_VALID(nlc_ctx->state))
+ goto out;
+
+ list_for_each_entry_safe(ne, tmp, &nlc_ctx->ne, list)
+ {
+ if (strcmp(ne->name, name) == 0) {
+ __nlc_free_ne(this, nlc_ctx, ne);
+ break;
+ }
+ }
+out:
+ return;
+}
+
+static void
+__nlc_add_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, inode_t *entry_ino,
+ const char *name)
+{
+ nlc_pe_t *pe = NULL;
+ int ret = -1;
+ nlc_conf_t *conf = NULL;
+ uint64_t nlc_ctx_int = 0;
+
+ conf = this->private;
+
+ /* TODO: There can be no duplicate entries, as it is added only
+ during create. In case there arises duplicate entries, search PE
+ found = __nlc_search (entries, name, _gf_false);
+ can use bit vector to have simple search than sequential search */
+
+ pe = GF_CALLOC(sizeof(*pe), 1, gf_nlc_mt_nlc_pe_t);
+ if (!pe)
+ goto out;
+
+ if (entry_ino) {
+ pe->inode = inode_ref(entry_ino);
+ nlc_inode_ctx_set(this, entry_ino, NULL, pe);
+ } else if (name) {
+ pe->name = gf_strdup(name);
+ if (!pe->name)
+ goto out;
+ }
+
+ list_add(&pe->list, &nlc_ctx->pe);
+
+ nlc_ctx->cache_size += sizeof(*pe) + sizeof(pe->name);
+ GF_ATOMIC_ADD(conf->current_cache_size, (sizeof(*pe) + sizeof(pe->name)));
+
+ nlc_ctx->refd_inodes += 1;
+ inode_ctx_get2(entry_ino, this, &nlc_ctx_int, NULL);
+ if (nlc_ctx_int == 0)
+ GF_ATOMIC_ADD(conf->refd_inodes, 1);
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(pe);
+
+ return;
+}
+
+static void
+__nlc_add_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, const char *name)
+{
+ nlc_ne_t *ne = NULL;
+ int ret = -1;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* TODO: search ne before adding to get rid of duplicate entries
+ found = __nlc_search (entries, name, _gf_false);
+ can use bit vector to have faster search than sequential search */
+
+ ne = GF_CALLOC(sizeof(*ne), 1, gf_nlc_mt_nlc_ne_t);
+ if (!ne)
+ goto out;
+
+ ne->name = gf_strdup(name);
+ if (!ne->name)
+ goto out;
+
+ list_add(&ne->list, &nlc_ctx->ne);
+
+ nlc_ctx->cache_size += sizeof(*ne) + sizeof(ne->name);
+ GF_ATOMIC_ADD(conf->current_cache_size, (sizeof(*ne) + sizeof(ne->name)));
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(ne);
+
+ return;
+}
+
+void
+nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ /* There is one possibility where we need to search before
+ * adding NE: when there are two parallel lookups on a non
+ * existent file */
+ if (!__nlc_search_ne(nlc_ctx, name)) {
+ __nlc_add_ne(this, nlc_ctx, name);
+ __nlc_set_dir_state(nlc_ctx, NLC_NE_VALID);
+ }
+ }
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+void
+nlc_dir_remove_pe(xlator_t *this, inode_t *parent, inode_t *entry_ino,
+ const char *name, gf_boolean_t multilink)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (parent->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get(this, parent, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (!__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ __nlc_del_pe(this, nlc_ctx, entry_ino, name, multilink);
+ __nlc_add_ne(this, nlc_ctx, name);
+ __nlc_set_dir_state(nlc_ctx, NLC_NE_VALID);
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+void
+nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ const char *name)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ __nlc_del_ne(this, nlc_ctx, name);
+ __nlc_add_pe(this, nlc_ctx, entry_ino, name);
+ if (!IS_PE_VALID(nlc_ctx->state))
+ __nlc_set_dir_state(nlc_ctx, NLC_PE_PARTIAL);
+ }
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+gf_boolean_t
+__nlc_search_ne(nlc_ctx_t *nlc_ctx, const char *name)
+{
+ gf_boolean_t found = _gf_false;
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp = NULL;
+
+ if (!IS_NE_VALID(nlc_ctx->state))
+ goto out;
+
+ list_for_each_entry_safe(ne, tmp, &nlc_ctx->ne, list)
+ {
+ if (strcmp(ne->name, name) == 0) {
+ found = _gf_true;
+ break;
+ }
+ }
+out:
+ return found;
+}
+
+static gf_boolean_t
+__nlc_search_pe(nlc_ctx_t *nlc_ctx, const char *name)
+{
+ gf_boolean_t found = _gf_false;
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+
+ if (!IS_PE_VALID(nlc_ctx->state))
+ goto out;
+
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = _gf_true;
+ break;
+ }
+ }
+out:
+ return found;
+}
+
+static char *
+__nlc_get_pe(nlc_ctx_t *nlc_ctx, const char *name,
+ gf_boolean_t case_insensitive)
+{
+ char *found = NULL;
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+
+ if (!IS_PE_VALID(nlc_ctx->state))
+ goto out;
+
+ if (case_insensitive) {
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcasecmp(pe->name, name) == 0)) {
+ found = pe->name;
+ break;
+ }
+ }
+ } else {
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = pe->name;
+ break;
+ }
+ }
+ }
+out:
+ return found;
+}
+
+gf_boolean_t
+nlc_is_negative_lookup(xlator_t *this, loc_t *loc)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t neg_entry = _gf_false;
+
+ inode = loc->parent;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (!__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ if (__nlc_search_ne(nlc_ctx, loc->name)) {
+ neg_entry = _gf_true;
+ goto unlock;
+ }
+ if ((nlc_ctx->state & NLC_PE_FULL) &&
+ !__nlc_search_pe(nlc_ctx, loc->name)) {
+ neg_entry = _gf_true;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+
+out:
+ return neg_entry;
+}
+
+gf_boolean_t
+nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname,
+ int32_t *op_ret, int32_t *op_errno, dict_t *dict)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t hit = _gf_false;
+ char *found_file = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, fname, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_ret, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ inode = loc->inode;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (!__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ found_file = __nlc_get_pe(nlc_ctx, fname, _gf_true);
+ if (found_file) {
+ ret = dict_set_dynstr(dict, GF_XATTR_GET_REAL_FILENAME_KEY,
+ gf_strdup(found_file));
+ if (ret < 0)
+ goto unlock;
+ *op_ret = strlen(found_file) + 1;
+ hit = _gf_true;
+ goto unlock;
+ }
+ if (!found_file && (nlc_ctx->state & NLC_PE_FULL)) {
+ *op_ret = -1;
+ *op_errno = ENOENT;
+ hit = _gf_true;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+
+out:
+ return hit;
+}
+
+void
+nlc_dump_inodectx(xlator_t *this, inode_t *inode)
+{
+ int32_t ret = -1;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char uuid_str[64] = {
+ 0,
+ };
+ nlc_ctx_t *nlc_ctx = NULL;
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp1 = NULL;
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+
+ if (!nlc_ctx)
+ goto out;
+
+ ret = TRY_LOCK(&nlc_ctx->lock);
+ if (!ret) {
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.nl-cache",
+ "nlc_inode");
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ __inode_path(inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
+
+ uuid_utoa_r(inode->gfid, uuid_str);
+
+ gf_proc_dump_write("inode", "%p", inode);
+ gf_proc_dump_write("gfid", "%s", uuid_str);
+
+ gf_proc_dump_write("state", "%" PRIu64, nlc_ctx->state);
+ gf_proc_dump_write("timer", "%p", nlc_ctx->timer);
+ gf_proc_dump_write("cache-time", "%ld", nlc_ctx->cache_time);
+ gf_proc_dump_write("cache-size", "%zu", nlc_ctx->cache_size);
+ gf_proc_dump_write("refd-inodes", "%" PRIu64, nlc_ctx->refd_inodes);
+
+ if (IS_PE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ gf_proc_dump_write("pe", "%p, %p, %s", pe, pe->inode, pe->name);
+ }
+
+ if (IS_NE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(ne, tmp1, &nlc_ctx->ne, list)
+ {
+ gf_proc_dump_write("ne", "%s", ne->name);
+ }
+
+ UNLOCK(&nlc_ctx->lock);
+ }
+
+ if (ret && nlc_ctx)
+ gf_proc_dump_write("Unable to dump the inode information",
+ "(Lock acquisition failed) %p (gfid: %s)", nlc_ctx,
+ uuid_str);
+out:
+ return;
+}
diff --git a/xlators/performance/nl-cache/src/nl-cache-mem-types.h b/xlators/performance/nl-cache/src/nl-cache-mem-types.h
new file mode 100644
index 00000000000..93a17b3fd5a
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __NL_CACHE_MEM_TYPES_H__
+#define __NL_CACHE_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_nlc_mem_types_ {
+ gf_nlc_mt_nlc_conf_t = gf_common_mt_end + 1,
+ gf_nlc_mt_nlc_ctx_t,
+ gf_nlc_mt_nlc_local_t,
+ gf_nlc_mt_nlc_pe_t,
+ gf_nlc_mt_nlc_ne_t,
+ gf_nlc_mt_nlc_timer_data_t,
+ gf_nlc_mt_nlc_lru_node,
+ gf_nlc_mt_end
+};
+
+#endif /* __NL_CACHE_MEM_TYPES_H__ */
diff --git a/xlators/performance/nl-cache/src/nl-cache-messages.h b/xlators/performance/nl-cache/src/nl-cache-messages.h
new file mode 100644
index 00000000000..222d709e133
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache-messages.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __NL_CACHE_MESSAGES_H__
+#define __NL_CACHE_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(NLC, NLC_MSG_NO_MEMORY, NLC_MSG_EINVAL, NLC_MSG_NO_TIMER_WHEEL,
+ NLC_MSG_DICT_FAILURE);
+
+#endif /* __NL_CACHE_MESSAGES_H__ */
diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
new file mode 100644
index 00000000000..33a7c471663
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache.c
@@ -0,0 +1,840 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "nl-cache.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/upcall-utils.h>
+
+static void
+nlc_dentry_op(call_frame_t *frame, xlator_t *this, gf_boolean_t multilink)
+{
+ nlc_local_t *local = frame->local;
+
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+
+ switch (local->fop) {
+ case GF_FOP_MKDIR:
+ nlc_set_dir_state(this, local->loc.inode, NLC_PE_FULL);
+ /*fall-through*/
+ case GF_FOP_MKNOD:
+ case GF_FOP_CREATE:
+ case GF_FOP_SYMLINK:
+ nlc_dir_add_pe(this, local->loc.parent, local->loc.inode,
+ local->loc.name);
+ break;
+ case GF_FOP_LINK:
+ nlc_dir_add_pe(this, local->loc2.parent, NULL, local->loc2.name);
+ break;
+ case GF_FOP_RMDIR:
+ nlc_inode_clear_cache(this, local->loc.inode, _gf_false);
+ /*fall-through*/
+ case GF_FOP_UNLINK:
+ nlc_dir_remove_pe(this, local->loc.parent, local->loc.inode,
+ local->loc.name, multilink);
+ break;
+ case GF_FOP_RENAME:
+ /* TBD: Should these be atomic ? In case of rename, the
+ * newloc->inode can be NULL, and hence use oldloc->inode */
+ nlc_dir_remove_pe(this, local->loc2.parent, local->loc2.inode,
+ local->loc2.name, _gf_false);
+
+ /*TODO: Remove old dentry from destination before adding this pe*/
+ nlc_dir_add_pe(this, local->loc.parent, local->loc2.inode,
+ local->loc.name);
+
+ default:
+ return;
+ }
+
+ nlc_lru_prune(this, NULL);
+out:
+ return;
+}
+
+#define NLC_FOP(_name, _op, loc1, loc2, frame, this, args...) \
+ do { \
+ nlc_local_t *__local = NULL; \
+ nlc_conf_t *conf = NULL; \
+ \
+ conf = this->private; \
+ \
+ if (!IS_PEC_ENABLED(conf)) \
+ goto disabled; \
+ \
+ __local = nlc_local_init(frame, this, _op, loc1, loc2); \
+ GF_VALIDATE_OR_GOTO(this->name, __local, err); \
+ \
+ STACK_WIND(frame, nlc_##_name##_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->_name, args); \
+ break; \
+ disabled: \
+ default_##_name##_resume(frame, this, args); \
+ break; \
+ err: \
+ default_##_name##_failure_cbk(frame, ENOMEM); \
+ break; \
+ } while (0)
+
+#define NLC_FOP_CBK(_name, multilink, frame, cookie, this, op_ret, op_errno, \
+ args...) \
+ do { \
+ nlc_conf_t *conf = NULL; \
+ \
+ if (op_ret != 0) \
+ goto out; \
+ \
+ conf = this->private; \
+ \
+ if (op_ret < 0 || !IS_PEC_ENABLED(conf)) \
+ goto out; \
+ nlc_dentry_op(frame, this, multilink); \
+ out: \
+ NLC_STACK_UNWIND(_name, frame, op_ret, op_errno, args); \
+ } while (0)
+
+static int32_t
+nlc_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ NLC_FOP_CBK(rename, _gf_false, frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent, postnewparent,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ NLC_FOP(rename, GF_FOP_RENAME, newloc, oldloc, frame, this, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(mknod, _gf_false, frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ NLC_FOP(mknod, GF_FOP_MKNOD, loc, NULL, frame, this, loc, mode, rdev, umask,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(create, _gf_false, frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ NLC_FOP(create, GF_FOP_CREATE, loc, NULL, frame, this, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(mkdir, _gf_false, frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ NLC_FOP(mkdir, GF_FOP_MKDIR, loc, NULL, frame, this, loc, mode, umask,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ nlc_local_t *local = NULL;
+ nlc_conf_t *conf = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (!local)
+ goto out;
+
+ /* Donot add to pe, this may lead to duplicate entry and
+ * requires search before adding if list of strings */
+ if (op_ret < 0 && op_errno == ENOENT) {
+ nlc_dir_add_ne(this, local->loc.parent, local->loc.name);
+ GF_ATOMIC_INC(conf->nlc_counter.nlc_miss);
+ }
+
+out:
+ NLC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t
+nlc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ nlc_local_t *local = NULL;
+ nlc_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ if (loc_is_nameless(loc))
+ goto wind;
+
+ local = nlc_local_init(frame, this, GF_FOP_LOOKUP, loc, NULL);
+ if (!local)
+ goto err;
+
+ conf = this->private;
+
+ inode = inode_grep(loc->inode->table, loc->parent, loc->name);
+ if (inode) {
+ inode_unref(inode);
+ goto wind;
+ }
+
+ if (nlc_is_negative_lookup(this, loc)) {
+ GF_ATOMIC_INC(conf->nlc_counter.nlc_hit);
+ gf_msg_trace(this->name, 0,
+ "Serving negative lookup from "
+ "cache:%s",
+ loc->name);
+ goto unwind;
+ }
+
+wind:
+ STACK_WIND(frame, nlc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+unwind:
+ NLC_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
+ return 0;
+err:
+ NLC_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+nlc_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ NLC_FOP_CBK(rmdir, _gf_false, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ NLC_FOP(rmdir, GF_FOP_RMDIR, loc, NULL, frame, this, loc, flags, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (!IS_PEC_ENABLED(conf))
+ goto out;
+
+ if (op_ret < 0 && op_errno == ENOENT) {
+ GF_ATOMIC_INC(conf->nlc_counter.getrealfilename_miss);
+ }
+
+out:
+ NLC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ dict_t *dict = NULL;
+ nlc_local_t *local = NULL;
+ gf_boolean_t hit = _gf_false;
+ const char *fname = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!IS_PEC_ENABLED(conf))
+ goto wind;
+
+ if (!key || (strncmp(key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) != 0))
+ goto wind;
+
+ local = nlc_local_init(frame, this, GF_FOP_GETXATTR, loc, NULL);
+ if (!local)
+ goto err;
+
+ if (loc->inode && key) {
+ dict = dict_new();
+ if (!dict)
+ goto err;
+
+ fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY);
+ hit = nlc_get_real_file_name(this, loc, fname, &op_ret, &op_errno,
+ dict);
+ if (hit)
+ goto unwind;
+ else
+ dict_unref(dict);
+ }
+
+ STACK_WIND(frame, nlc_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
+wind:
+ STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
+unwind:
+ GF_ATOMIC_INC(conf->nlc_counter.getrealfilename_hit);
+ NLC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, NULL);
+ dict_unref(dict);
+ return 0;
+err:
+ NLC_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+nlc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(symlink, _gf_false, frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ NLC_FOP(symlink, GF_FOP_SYMLINK, loc, NULL, frame, this, linkpath, loc,
+ umask, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(link, _gf_false, frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ NLC_FOP(link, GF_FOP_LINK, oldloc, newloc, frame, this, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ uint32_t link_count = 0;
+ gf_boolean_t multilink = _gf_false;
+
+ if (xdata && !dict_get_uint32(xdata, GET_LINK_COUNT, &link_count)) {
+ if (link_count > 1)
+ multilink = _gf_true;
+ } else {
+ /* Don't touch cache if we don't know enough */
+ gf_msg(this->name, GF_LOG_WARNING, 0, NLC_MSG_DICT_FAILURE,
+ "Failed to get GET_LINK_COUNT from dict");
+ NLC_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+ }
+
+ NLC_FOP_CBK(unlink, multilink, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ nlc_conf_t *conf = NULL;
+ gf_boolean_t new_dict = _gf_false;
+
+ conf = this->private;
+
+ if (!IS_PEC_ENABLED(conf))
+ goto do_fop;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (xdata)
+ new_dict = _gf_true;
+ }
+
+ if (xdata && dict_set_uint32(xdata, GET_LINK_COUNT, 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, NLC_MSG_DICT_FAILURE,
+ "Failed to set GET_LINK_COUNT in dict");
+ goto err;
+ }
+
+do_fop:
+ NLC_FOP(unlink, GF_FOP_UNLINK, loc, NULL, frame, this, loc, flags, xdata);
+
+ if (new_dict)
+ dict_unref(xdata);
+ return 0;
+}
+
+static int32_t
+nlc_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ inode_t *parent1 = NULL;
+ inode_t *parent2 = NULL;
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ nlc_conf_t *conf = NULL;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ /*TODO: Add he inodes found as a member in gf_upcall_cache_invalidation
+ * so that it prevents subsequent xlators from doing inode_find again
+ */
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if ((!((up_ci->flags & UP_TIMES) && inode->ia_type == IA_IFDIR)) &&
+ (!(up_ci->flags & UP_PARENT_DENTRY_FLAGS))) {
+ goto out;
+ }
+
+ if (!gf_uuid_is_null(up_ci->p_stat.ia_gfid)) {
+ parent1 = inode_find(itable, up_ci->p_stat.ia_gfid);
+ if (!parent1) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (!gf_uuid_is_null(up_ci->oldp_stat.ia_gfid)) {
+ parent2 = inode_find(itable, up_ci->oldp_stat.ia_gfid);
+ if (!parent2) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* TODO: get enough data in upcall so that we do not invalidate but
+ * update */
+ if (inode && inode->ia_type == IA_IFDIR)
+ nlc_inode_clear_cache(this, inode, NLC_NONE);
+ if (parent1)
+ nlc_inode_clear_cache(this, parent1, NLC_NONE);
+ if (parent2)
+ nlc_inode_clear_cache(this, parent2, NLC_NONE);
+
+ GF_ATOMIC_INC(conf->nlc_counter.nlc_invals);
+
+out:
+ if (inode)
+ inode_unref(inode);
+ if (parent1)
+ inode_unref(parent1);
+ if (parent2)
+ inode_unref(parent2);
+
+ return ret;
+}
+
+int
+nlc_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ nlc_update_child_down_time(this, gf_time());
+ /* TODO: nlc_clear_all_cache (this); else
+ lru prune will lazily clear it*/
+ break;
+ case GF_EVENT_UPCALL:
+ ret = nlc_invalidate(this, data);
+ break;
+ case GF_EVENT_PARENT_DOWN:
+ nlc_disable_cache(this);
+ nlc_clear_all_cache(this);
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
+
+static int32_t
+nlc_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t pe_int = 0;
+ uint64_t nlc_ctx_int = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ inode_ctx_reset1(inode, this, &pe_int);
+ GF_ASSERT(pe_int == 0);
+
+ nlc_inode_clear_cache(this, inode, NLC_NONE);
+ inode_ctx_reset0(inode, this, &nlc_ctx_int);
+ nlc_ctx = (void *)(long)nlc_ctx_int;
+ if (nlc_ctx) {
+ GF_FREE(nlc_ctx);
+ GF_ATOMIC_SUB(conf->current_cache_size, sizeof(*nlc_ctx));
+ }
+
+ return 0;
+}
+
+static int32_t
+nlc_inodectx(xlator_t *this, inode_t *inode)
+{
+ nlc_dump_inodectx(this, inode);
+ return 0;
+}
+
+static int32_t
+nlc_priv_dump(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("negative_lookup_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_hit));
+ gf_proc_dump_write("negative_lookup_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_miss));
+ gf_proc_dump_write("get_real_filename_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_hit));
+ gf_proc_dump_write("get_real_filename_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_miss));
+ gf_proc_dump_write("nameless_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nameless_lookup));
+ gf_proc_dump_write("inodes_with_positive_dentry_cache", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.pe_inode_cnt));
+ gf_proc_dump_write("inodes_with_negative_dentry_cache", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.ne_inode_cnt));
+ gf_proc_dump_write("dentry_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_invals));
+ gf_proc_dump_write("cache_limit", "%" PRIu64, conf->cache_size);
+ gf_proc_dump_write("consumed_cache_size", "%" PRId64,
+ GF_ATOMIC_GET(conf->current_cache_size));
+ gf_proc_dump_write("inode_limit", "%" PRIu64, conf->inode_limit);
+ gf_proc_dump_write("consumed_inodes", "%" PRId64,
+ GF_ATOMIC_GET(conf->refd_inodes));
+
+ return 0;
+}
+
+static int32_t
+nlc_dump_metrics(xlator_t *this, int fd)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ dprintf(fd, "%s.negative_lookup_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_hit));
+ dprintf(fd, "%s.negative_lookup_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_miss));
+ dprintf(fd, "%s.get_real_filename_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_hit));
+ dprintf(fd, "%s.get_real_filename_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_miss));
+ dprintf(fd, "%s.nameless_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nameless_lookup));
+ dprintf(fd, "%s.inodes_with_positive_dentry_cache %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->nlc_counter.pe_inode_cnt));
+ dprintf(fd, "%s.inodes_with_negative_dentry_cache %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->nlc_counter.ne_inode_cnt));
+ dprintf(fd, "%s.dentry_invalidations_received %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_invals));
+ dprintf(fd, "%s.cache_limit %" PRIu64 "\n", this->name, conf->cache_size);
+ dprintf(fd, "%s.consumed_cache_size %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->current_cache_size));
+ dprintf(fd, "%s.inode_limit %" PRIu64 "\n", this->name, conf->inode_limit);
+ dprintf(fd, "%s.consumed_inodes %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->refd_inodes));
+
+ return 0;
+}
+
+void
+nlc_fini(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+ GF_FREE(conf);
+
+ glusterfs_ctx_tw_put(this->ctx);
+
+ return;
+}
+
+int32_t
+nlc_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_nlc_mt_end + 1);
+ return ret;
+}
+
+int32_t
+nlc_reconfigure(xlator_t *this, dict_t *options)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("nl-cache-timeout", conf->cache_timeout, options, int32,
+ out);
+ GF_OPTION_RECONF("nl-cache-positive-entry", conf->positive_entry_cache,
+ options, bool, out);
+ GF_OPTION_RECONF("nl-cache-limit", conf->cache_size, options, size_uint64,
+ out);
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+out:
+ return 0;
+}
+
+int32_t
+nlc_init(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+ inode_table_t *itable = NULL;
+
+ conf = GF_CALLOC(sizeof(*conf), 1, gf_nlc_mt_nlc_conf_t);
+ if (!conf)
+ goto out;
+
+ GF_OPTION_INIT("nl-cache-timeout", conf->cache_timeout, int32, out);
+ GF_OPTION_INIT("nl-cache-positive-entry", conf->positive_entry_cache, bool,
+ out);
+ GF_OPTION_INIT("nl-cache-limit", conf->cache_size, size_uint64, out);
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ /* Since the positive entries are stored as list of refs on
+ * existing inodes, we should not overflow the inode lru_limit.
+ * Hence keep the limit of inodes that are refed by this xlator,
+ * to 80% of inode_table->lru_limit. In fuse where the limit is
+ * infinite, take 131072 as lru limit (as in gfapi). */
+ itable = ((xlator_t *)this->graph->top)->itable;
+ if (itable && itable->lru_limit)
+ conf->inode_limit = itable->lru_limit * 80 / 100;
+ else
+ conf->inode_limit = 131072 * 80 / 100;
+
+ LOCK_INIT(&conf->lock);
+ GF_ATOMIC_INIT(conf->current_cache_size, 0);
+ GF_ATOMIC_INIT(conf->refd_inodes, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nlc_hit, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nlc_miss, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nameless_lookup, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.getrealfilename_hit, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.getrealfilename_miss, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.pe_inode_cnt, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.ne_inode_cnt, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0);
+
+ INIT_LIST_HEAD(&conf->lru);
+ conf->last_child_down = gf_time();
+
+ conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
+ if (!conf->timer_wheel) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, NLC_MSG_NO_TIMER_WHEEL,
+ "Initing the global timer wheel failed");
+ goto out;
+ }
+
+ this->private = conf;
+
+ ret = 0;
+out:
+ if (ret < 0)
+ GF_FREE(conf);
+
+ return ret;
+}
+
+struct xlator_fops nlc_fops = {
+ .rename = nlc_rename,
+ .mknod = nlc_mknod,
+ .create = nlc_create,
+ .mkdir = nlc_mkdir,
+ .lookup = nlc_lookup,
+ .rmdir = nlc_rmdir,
+ .getxattr = nlc_getxattr,
+ .symlink = nlc_symlink,
+ .link = nlc_link,
+ .unlink = nlc_unlink,
+ /* TODO:
+ .readdir = nlc_readdir,
+ .readdirp = nlc_readdirp,
+ .seek = nlc_seek,
+ .opendir = nlc_opendir, */
+};
+
+struct xlator_cbks nlc_cbks = {
+ .forget = nlc_forget,
+};
+
+struct xlator_dumpops nlc_dumpops = {
+ .inodectx = nlc_inodectx,
+ .priv = nlc_priv_dump,
+};
+
+struct volume_options nlc_options[] = {
+ {
+ .key = {"nl-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable nl-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"nl-cache-positive-entry"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache the name of the files/directories that was"
+ " looked up and are present in a directory",
+ },
+ {
+ .key = {"nl-cache-limit"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .default_value = "131072",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "the value over which caching will be disabled for"
+ "a while and the cache is cleared based on LRU",
+ },
+ {
+ .key = {"nl-cache-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .default_value = "60",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Time period after which cache has to be refreshed",
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"nl-cache"},
+ .description = "Enable/Disable nl cache translator"},
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = nlc_init,
+ .fini = nlc_fini,
+ .notify = nlc_notify,
+ .reconfigure = nlc_reconfigure,
+ .mem_acct_init = nlc_mem_acct_init,
+ .dump_metrics = nlc_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &nlc_dumpops,
+ .fops = &nlc_fops,
+ .cbks = &nlc_cbks,
+ .options = nlc_options,
+ .identifier = "nl-cache",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h
new file mode 100644
index 00000000000..85fcc176342
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __NL_CACHE_H__
+#define __NL_CACHE_H__
+
+#include "nl-cache-mem-types.h"
+#include "nl-cache-messages.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/atomic.h>
+
+#define NLC_INVALID 0x0000
+#define NLC_PE_FULL 0x0001
+#define NLC_PE_PARTIAL 0x0002
+#define NLC_NE_VALID 0x0004
+
+#define IS_PE_VALID(state) \
+ ((state != NLC_INVALID) && (state & (NLC_PE_FULL | NLC_PE_PARTIAL)))
+#define IS_NE_VALID(state) ((state != NLC_INVALID) && (state & NLC_NE_VALID))
+
+#define IS_PEC_ENABLED(conf) (conf->positive_entry_cache)
+#define IS_CACHE_ENABLED(conf) ((!conf->cache_disabled))
+
+#define NLC_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ nlc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ nlc_local_wipe(__xl, __local); \
+ } while (0)
+
+enum nlc_cache_clear_reason {
+ NLC_NONE = 0,
+ NLC_LRU_PRUNE,
+};
+
+struct nlc_ne {
+ struct list_head list;
+ char *name;
+};
+typedef struct nlc_ne nlc_ne_t;
+
+struct nlc_pe {
+ struct list_head list;
+ inode_t *inode;
+ char *name;
+};
+typedef struct nlc_pe nlc_pe_t;
+
+struct nlc_timer_data {
+ inode_t *inode;
+ xlator_t *this;
+};
+typedef struct nlc_timer_data nlc_timer_data_t;
+
+struct nlc_lru_node {
+ inode_t *inode;
+ struct list_head list;
+};
+typedef struct nlc_lru_node nlc_lru_node_t;
+
+struct nlc_ctx {
+ struct list_head pe; /* list of positive entries */
+ struct list_head ne; /* list of negative entries */
+ uint64_t state;
+ time_t cache_time;
+ struct gf_tw_timer_list *timer;
+ nlc_timer_data_t *timer_data;
+ size_t cache_size;
+ uint64_t refd_inodes;
+ gf_lock_t lock;
+};
+typedef struct nlc_ctx nlc_ctx_t;
+
+struct nlc_local {
+ loc_t loc;
+ loc_t loc2;
+ inode_t *inode;
+ inode_t *parent;
+ fd_t *fd;
+ char *linkname;
+ glusterfs_fop_t fop;
+};
+typedef struct nlc_local nlc_local_t;
+
+struct nlc_statistics {
+ gf_atomic_t nlc_hit; /* No. of times lookup/stat was served from this xl */
+ gf_atomic_t nlc_miss; /* No. of times negative lookups were sent to disk */
+ /* More granular counters */
+ gf_atomic_t nameless_lookup;
+ gf_atomic_t getrealfilename_hit;
+ gf_atomic_t getrealfilename_miss;
+ gf_atomic_t pe_inode_cnt;
+ gf_atomic_t ne_inode_cnt;
+ gf_atomic_t nlc_invals; /* No. of invalidates received from upcall*/
+};
+
+struct nlc_conf {
+ int32_t cache_timeout;
+ gf_boolean_t positive_entry_cache;
+ gf_boolean_t negative_entry_cache;
+ gf_boolean_t disable_cache;
+ uint64_t cache_size;
+ gf_atomic_t current_cache_size;
+ uint64_t inode_limit;
+ gf_atomic_t refd_inodes;
+ struct tvec_base *timer_wheel;
+ time_t last_child_down;
+ struct list_head lru;
+ gf_lock_t lock;
+ struct nlc_statistics nlc_counter;
+};
+typedef struct nlc_conf nlc_conf_t;
+
+gf_boolean_t
+nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname,
+ int32_t *op_ret, int32_t *op_errno, dict_t *dict);
+
+gf_boolean_t
+nlc_is_negative_lookup(xlator_t *this, loc_t *loc);
+
+void
+nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state);
+
+void
+nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ const char *name);
+
+void
+nlc_dir_remove_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ const char *name, gf_boolean_t multilink);
+
+void
+nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name);
+
+void
+nlc_local_wipe(xlator_t *this, nlc_local_t *local);
+
+nlc_local_t *
+nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, loc_t *loc2);
+
+void
+nlc_update_child_down_time(xlator_t *this, time_t now);
+
+void
+nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason);
+
+void
+nlc_dump_inodectx(xlator_t *this, inode_t *inode);
+
+void
+nlc_clear_all_cache(xlator_t *this);
+
+void
+nlc_disable_cache(xlator_t *this);
+
+void
+nlc_lru_prune(xlator_t *this, inode_t *inode);
+
+#endif /* __NL_CACHE_H__ */
diff --git a/xlators/performance/open-behind/src/Makefile.am b/xlators/performance/open-behind/src/Makefile.am
index 12528570783..41930dcd67d 100644
--- a/xlators/performance/open-behind/src/Makefile.am
+++ b/xlators/performance/open-behind/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = open-behind.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-open_behind_la_LDFLAGS = -module -avoid-version
+open_behind_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
open_behind_la_SOURCES = open-behind.c
open_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = open-behind-mem-types.h
+noinst_HEADERS = open-behind-mem-types.h open-behind-messages.h
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h
index 1e94296f424..6c1ab2e19d2 100644
--- a/xlators/performance/open-behind/src/open-behind-mem-types.h
+++ b/xlators/performance/open-behind/src/open-behind-mem-types.h
@@ -11,11 +11,12 @@
#ifndef __OB_MEM_TYPES_H__
#define __OB_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_ob_mem_types_ {
- gf_ob_mt_fd_t = gf_common_mt_end + 1,
- gf_ob_mt_conf_t,
- gf_ob_mt_end
+ gf_ob_mt_fd_t = gf_common_mt_end + 1,
+ gf_ob_mt_conf_t,
+ gf_ob_mt_inode_t,
+ gf_ob_mt_end
};
#endif
diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
new file mode 100644
index 00000000000..0e789177684
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind-messages.h
@@ -0,0 +1,32 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _OPEN_BEHIND_MESSAGES_H_
+#define _OPEN_BEHIND_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+ OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
+ OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
+
+#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
+#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
+
+#endif /* _OPEN_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
index 26b684e888c..600c3b62ffe 100644
--- a/xlators/performance/open-behind/src/open-behind.c
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -9,968 +9,1093 @@
*/
#include "open-behind-mem-types.h"
-#include "xlator.h"
-#include "statedump.h"
-#include "call-stub.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include "open-behind-messages.h"
+#include <glusterfs/glusterfs-acl.h>
+
+/* Note: The initial design of open-behind was made to cover the simple case
+ * of open, read, close for small files. This pattern combined with
+ * quick-read can do the whole operation without a single request to the
+ * bricks (except the initial lookup).
+ *
+ * The way to do this has been improved, but the logic remains the same.
+ * Basically, this means that any operation sent to the fd or the inode
+ * that it's not a read, causes the open request to be sent to the
+ * bricks, and all future operations will be executed synchronously,
+ * including opens (it's reset once all fd's are closed).
+ */
typedef struct ob_conf {
- gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
- e.g - fstat() readv()
-
- whereas for fops like writev(), lk(),
- the fd is important for side effects
- like mandatory locks
- */
- gf_boolean_t lazy_open; /* delay backend open as much as possible */
+ gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
+ e.g - fstat() readv()
+
+ whereas for fops like writev(), lk(),
+ the fd is important for side effects
+ like mandatory locks
+ */
+ gf_boolean_t lazy_open; /* delay backend open as much as possible */
+ gf_boolean_t read_after_open; /* instead of sending readvs on
+ anonymous fds, open the file
+ first and then send readv i.e
+ similar to what writev does
+ */
} ob_conf_t;
-
-typedef struct ob_fd {
- call_frame_t *open_frame;
- loc_t loc;
- dict_t *xdata;
- int flags;
- int op_errno;
- struct list_head list;
-} ob_fd_t;
-
-
-ob_fd_t *
-__ob_fd_ctx_get (xlator_t *this, fd_t *fd)
+/* A negative state represents an errno value negated. In this case the
+ * current operation cannot be processed. */
+typedef enum _ob_state {
+ /* There are no opens on the inode or the first open is already
+ * completed. The current operation can be sent directly. */
+ OB_STATE_READY = 0,
+
+ /* There's an open pending and it has been triggered. The current
+ * operation should be "stubbified" and processed with
+ * ob_stub_dispatch(). */
+ OB_STATE_OPEN_TRIGGERED,
+
+ /* There's an open pending but it has not been triggered. The current
+ * operation can be processed directly but using an anonymous fd. */
+ OB_STATE_OPEN_PENDING,
+
+ /* The current operation is the first open on the inode. */
+ OB_STATE_FIRST_OPEN
+} ob_state_t;
+
+typedef struct ob_inode {
+ /* List of stubs pending on the first open. Once the first open is
+ * complete, all these stubs will be resubmitted, and dependencies
+ * will be checked again. */
+ struct list_head resume_fops;
+
+ /* The inode this object references. */
+ inode_t *inode;
+
+ /* The fd from the first open sent to this inode. It will be set
+ * from the moment the open is processed until the open if fully
+ * executed or closed before actually opened. It's NULL in all
+ * other cases. */
+ fd_t *first_fd;
+
+ /* The stub from the first open operation. When open fop starts
+ * being processed, it's assigned the OB_OPEN_PREPARING value
+ * until the actual stub is created. This is necessary to avoid
+ * creating the stub inside a locked region. Once the stub is
+ * successfully created, it's assigned here. This value is set
+ * to NULL once the stub is resumed. */
+ call_stub_t *first_open;
+
+ /* The total number of currently open fd's on this inode. */
+ int32_t open_count;
+
+ /* This flag is set as soon as we know that the open will be
+ * sent to the bricks, even before the stub is ready. */
+ bool triggered;
+} ob_inode_t;
+
+/* Dummy pointer used temporarily while the actual open stub is being created */
+#define OB_OPEN_PREPARING ((call_stub_t *)-1)
+
+#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \
+ case OB_STATE_FIRST_OPEN: \
+ gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \
+ "fop=%s", #_fop, "state=%d", __ob_state, NULL); \
+ default_##_fop##_failure_cbk(_frame, EINVAL); \
+ break; \
+ case OB_STATE_READY: \
+ default_##_fop(_frame, _xl, ##_args); \
+ break; \
+ case OB_STATE_OPEN_TRIGGERED: { \
+ call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \
+ ##_args); \
+ if (__ob_stub != NULL) { \
+ ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \
+ break; \
+ } \
+ __ob_state = -ENOMEM; \
+ } \
+ default: \
+ gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \
+ OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \
+ default_##_fop##_failure_cbk(_frame, -__ob_state)
+
+#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_fd( \
+ _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ if (!(_trigger)) { \
+ fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \
+ (_fd)->flags); \
+ if (__ob_fd != NULL) { \
+ default_##_fop(_frame, _xl, ##_args); \
+ fd_unref(__ob_fd); \
+ break; \
+ } \
+ __ob_state = -ENOMEM; \
+ } \
+ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_fd( \
+ _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \
+ break; \
+ OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_inode( \
+ _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+static ob_inode_t *
+ob_inode_get_locked(xlator_t *this, inode_t *inode)
{
- uint64_t value = 0;
- int ret = -1;
- ob_fd_t *ob_fd = NULL;
-
- ret = __fd_ctx_get (fd, this, &value);
- if (ret)
- return NULL;
-
- ob_fd = (void *) ((long) value);
+ ob_inode_t *ob_inode = NULL;
+ uint64_t value = 0;
+
+ if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
+ return (ob_inode_t *)(uintptr_t)value;
+ }
+
+ ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
+ if (ob_inode != NULL) {
+ ob_inode->inode = inode;
+ INIT_LIST_HEAD(&ob_inode->resume_fops);
+
+ value = (uint64_t)(uintptr_t)ob_inode;
+ if (__inode_ctx_set(inode, this, &value) < 0) {
+ GF_FREE(ob_inode);
+ ob_inode = NULL;
+ }
+ }
- return ob_fd;
+ return ob_inode;
}
-
-ob_fd_t *
-ob_fd_ctx_get (xlator_t *this, fd_t *fd)
+static ob_state_t
+ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
+ int32_t open_count, bool synchronous, bool trigger,
+ ob_inode_t **pob_inode, fd_t **pfd)
{
- ob_fd_t *ob_fd = NULL;
+ ob_conf_t *conf;
+ ob_inode_t *ob_inode;
+ call_stub_t *open_stub;
- LOCK (&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get (this, fd);
- }
- UNLOCK (&fd->lock);
+ if (inode == NULL) {
+ return OB_STATE_READY;
+ }
- return ob_fd;
-}
+ conf = xl->private;
+ *pfd = NULL;
-int
-__ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
-{
- uint64_t value = 0;
- int ret = -1;
+ LOCK(&inode->lock);
+ {
+ ob_inode = ob_inode_get_locked(xl, inode);
+ if (ob_inode == NULL) {
+ UNLOCK(&inode->lock);
- value = (long) ((void *) ob_fd);
+ return -ENOMEM;
+ }
+ *pob_inode = ob_inode;
+
+ ob_inode->open_count += open_count;
+
+ /* If first_fd is not NULL, it means that there's a previous open not
+ * yet completed. */
+ if (ob_inode->first_fd != NULL) {
+ *pfd = ob_inode->first_fd;
+ /* If the current request doesn't trigger the open and it hasn't
+ * been triggered yet, we can continue without issuing the open
+ * only if the current request belongs to the same fd as the
+ * first one. */
+ if (!trigger && !ob_inode->triggered &&
+ (ob_inode->first_fd == fd)) {
+ UNLOCK(&inode->lock);
+
+ return OB_STATE_OPEN_PENDING;
+ }
+
+ /* We need to issue the open. It could have already been triggered
+ * before. In this case open_stub will be NULL. Or the initial open
+ * may not be completely ready yet. In this case open_stub will be
+ * OB_OPEN_PREPARING. */
+ open_stub = ob_inode->first_open;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = true;
+
+ UNLOCK(&inode->lock);
+
+ if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
+ call_resume(open_stub);
+ }
+
+ return OB_STATE_OPEN_TRIGGERED;
+ }
- ret = __fd_ctx_set (fd, this, value);
+ /* There's no pending open. Only opens can be non synchronous, so all
+ * regular fops will be processed directly. For non synchronous opens,
+ * we'll still process them normally (i.e. synchornous) if there are
+ * more file descriptors open. */
+ if (synchronous || (ob_inode->open_count > open_count)) {
+ UNLOCK(&inode->lock);
- return ret;
-}
+ return OB_STATE_READY;
+ }
+ *pfd = fd;
-int
-ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
-{
- int ret = -1;
+ /* This is the first open. We keep a reference on the fd and set
+ * first_open stub to OB_OPEN_PREPARING until the actual stub can
+ * be assigned (we don't create the stub here to avoid doing memory
+ * allocations inside the mutex). */
+ ob_inode->first_fd = __fd_ref(fd);
+ ob_inode->first_open = OB_OPEN_PREPARING;
- LOCK (&fd->lock);
- {
- ret = __ob_fd_ctx_set (this, fd, ob_fd);
- }
- UNLOCK (&fd->lock);
+ /* If lazy_open is not set, we'll need to immediately send the open,
+ * so we set triggered right now. */
+ ob_inode->triggered = !conf->lazy_open;
+ }
+ UNLOCK(&inode->lock);
- return ret;
+ return OB_STATE_FIRST_OPEN;
}
-
-ob_fd_t *
-ob_fd_new (void)
+static ob_state_t
+ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
+ bool synchronous, bool trigger, ob_inode_t **pob_inode,
+ fd_t **pfd)
{
- ob_fd_t *ob_fd = NULL;
-
- ob_fd = GF_CALLOC (1, sizeof (*ob_fd), gf_ob_mt_fd_t);
+ uint64_t err;
- INIT_LIST_HEAD (&ob_fd->list);
+ if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
+ return (ob_state_t)-err;
+ }
- return ob_fd;
+ return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
+ trigger, pob_inode, pfd);
}
-
-void
-ob_fd_free (ob_fd_t *ob_fd)
+static ob_state_t
+ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
+ fd_t **pfd)
{
- loc_wipe (&ob_fd->loc);
-
- if (ob_fd->xdata)
- dict_unref (ob_fd->xdata);
+ bool synchronous;
- if (ob_fd->open_frame)
- STACK_DESTROY (ob_fd->open_frame->root);
+ /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
+ * we also execute this open synchronously ? */
+ synchronous = (flags & O_TRUNC) != 0;
- GF_FREE (ob_fd);
+ return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
}
-
-int
-ob_wake_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd_ret, dict_t *xdata)
+static int32_t
+ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
{
- fd_t *fd = NULL;
- struct list_head list;
- ob_fd_t *ob_fd = NULL;
- call_stub_t *stub = NULL, *tmp = NULL;
-
- fd = frame->local;
- frame->local = NULL;
-
- INIT_LIST_HEAD (&list);
-
- LOCK (&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get (this, fd);
-
- list_splice_init (&ob_fd->list, &list);
-
- if (op_ret < 0) {
- /* mark fd BAD for ever */
- ob_fd->op_errno = op_errno;
- } else {
- __fd_ctx_del (fd, this, NULL);
- ob_fd_free (ob_fd);
- }
- }
- UNLOCK (&fd->lock);
-
- list_for_each_entry_safe (stub, tmp, &list, list) {
- list_del_init (&stub->list);
-
- if (op_ret < 0)
- call_unwind_error (stub, -1, op_errno);
- else
- call_resume (stub);
- }
-
- fd_unref (fd);
+ LOCK(&ob_inode->inode->lock);
+ {
+ /* We only queue a stub if the open has not been completed or
+ * cancelled. */
+ if (ob_inode->first_fd == fd) {
+ list_add_tail(&stub->list, &ob_inode->resume_fops);
+ stub = NULL;
+ }
+ }
+ UNLOCK(&ob_inode->inode->lock);
- STACK_DESTROY (frame->root);
+ if (stub != NULL) {
+ call_resume(stub);
+ }
- return 0;
+ return 0;
}
-
-int
-ob_fd_wake (xlator_t *this, fd_t *fd)
+static void
+ob_open_destroy(call_stub_t *stub, fd_t *fd)
{
- call_frame_t *frame = NULL;
- ob_fd_t *ob_fd = NULL;
-
- LOCK (&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get (this, fd);
- if (!ob_fd)
- goto unlock;
-
- frame = ob_fd->open_frame;
- ob_fd->open_frame = NULL;
- }
-unlock:
- UNLOCK (&fd->lock);
-
- if (frame) {
- frame->local = fd_ref (fd);
-
- STACK_WIND (frame, ob_wake_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open,
- &ob_fd->loc, ob_fd->flags, fd, ob_fd->xdata);
- }
-
- return 0;
+ stub->frame->local = NULL;
+ STACK_DESTROY(stub->frame->root);
+ call_stub_destroy(stub);
+ fd_unref(fd);
}
-
-int
-open_and_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+static int32_t
+ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
{
- ob_fd_t *ob_fd = NULL;
- int op_errno = 0;
-
- if (!fd)
- goto nofd;
-
- LOCK (&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get (this, fd);
- if (!ob_fd)
- goto unlock;
-
- if (ob_fd->op_errno) {
- op_errno = ob_fd->op_errno;
- goto unlock;
- }
-
- list_add_tail (&stub->list, &ob_fd->list);
- }
-unlock:
- UNLOCK (&fd->lock);
-
-nofd:
- if (op_errno)
- call_unwind_error (stub, -1, op_errno);
- else if (ob_fd)
- ob_fd_wake (this, fd);
- else
- call_resume (stub);
+ bool closed;
+
+ LOCK(&ob_inode->inode->lock);
+ {
+ closed = ob_inode->first_fd != fd;
+ if (!closed) {
+ if (ob_inode->triggered) {
+ ob_inode->first_open = NULL;
+ } else {
+ ob_inode->first_open = stub;
+ stub = NULL;
+ }
+ }
+ }
+ UNLOCK(&ob_inode->inode->lock);
+
+ if (stub != NULL) {
+ if (closed) {
+ ob_open_destroy(stub, fd);
+ } else {
+ call_resume(stub);
+ }
+ }
- return 0;
+ return 0;
}
-
-int
-ob_open_behind (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- fd_t *fd, dict_t *xdata)
+static void
+ob_resume_pending(struct list_head *list)
{
- ob_fd_t *ob_fd = NULL;
- int ret = -1;
- ob_conf_t *conf = NULL;
-
-
- conf = this->private;
-
- if (flags & O_TRUNC) {
- STACK_WIND (frame, default_open_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
- }
-
- ob_fd = ob_fd_new ();
- if (!ob_fd)
- goto enomem;
-
- ob_fd->open_frame = copy_frame (frame);
- if (!ob_fd->open_frame)
- goto enomem;
- ret = loc_copy (&ob_fd->loc, loc);
- if (ret)
- goto enomem;
-
- ob_fd->flags = flags;
- if (xdata)
- ob_fd->xdata = dict_ref (xdata);
+ call_stub_t *stub;
- ret = ob_fd_ctx_set (this, fd, ob_fd);
- if (ret)
- goto enomem;
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
- fd_ref (fd);
-
- STACK_UNWIND_STRICT (open, frame, 0, 0, fd, xdata);
-
- if (!conf->lazy_open)
- ob_fd_wake (this, fd);
-
- fd_unref (fd);
-
- return 0;
-enomem:
- if (ob_fd) {
- if (ob_fd->open_frame)
- STACK_DESTROY (ob_fd->open_frame->root);
- loc_wipe (&ob_fd->loc);
- if (ob_fd->xdata)
- dict_unref (ob_fd->xdata);
- GF_FREE (ob_fd);
- }
-
- return -1;
+ call_resume(stub);
+ }
}
-
-int
-ob_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- fd_t *fd, dict_t *xdata)
-{
- fd_t *old_fd = NULL;
- int ret = -1;
- int op_errno = 0;
- call_stub_t *stub = NULL;
-
- old_fd = fd_lookup (fd->inode, 0);
- if (old_fd) {
- /* open-behind only when this is the first FD */
- stub = fop_open_stub (frame, default_open_resume,
- loc, flags, fd, xdata);
- if (!stub) {
- op_errno = ENOMEM;
- fd_unref (old_fd);
- goto err;
- }
-
- open_and_resume (this, old_fd, stub);
-
- fd_unref (old_fd);
-
- return 0;
- }
-
- ret = ob_open_behind (frame, this, loc, flags, fd, xdata);
- if (ret) {
- op_errno = ENOMEM;
- goto err;
- }
-
- return 0;
-err:
- gf_log (this->name, GF_LOG_ERROR, "%s: %s", loc->path,
- strerror (op_errno));
-
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, 0, 0);
-
- return 0;
-}
-
-
-fd_t *
-ob_get_wind_fd (xlator_t *this, fd_t *fd)
+static void
+ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
+ int32_t op_errno)
{
- ob_conf_t *conf = NULL;
- ob_fd_t *ob_fd = NULL;
-
- conf = this->private;
-
- ob_fd = ob_fd_ctx_get (this, fd);
+ struct list_head list;
+
+ INIT_LIST_HEAD(&list);
+
+ if (op_ret < 0) {
+ fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
+ }
+
+ LOCK(&ob_inode->inode->lock);
+ {
+ /* Only update the fields if the file has not been closed before
+ * getting here. */
+ if (ob_inode->first_fd == fd) {
+ list_splice_init(&ob_inode->resume_fops, &list);
+ ob_inode->first_fd = NULL;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = false;
+ }
+ }
+ UNLOCK(&ob_inode->inode->lock);
- if (ob_fd && conf->use_anonymous_fd)
- return fd_anonymous (fd->inode);
+ ob_resume_pending(&list);
- return fd_ref (fd);
+ fd_unref(fd);
}
-
-int
-ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+static int32_t
+ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *wind_fd = NULL;
+ ob_inode_t *ob_inode;
- wind_fd = ob_get_wind_fd (this, fd);
+ ob_inode = frame->local;
+ frame->local = NULL;
- stub = fop_readv_stub (frame, default_readv_resume, wind_fd,
- size, offset, flags, xdata);
- fd_unref (wind_fd);
+ ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
- if (!stub)
- goto err;
+ STACK_DESTROY(frame->root);
- open_and_resume (this, wind_fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
-
- return 0;
+ return 0;
}
-
-int
-ob_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
- int count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
+static int32_t
+ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_writev_stub (frame, default_writev_resume, fd, iov, count,
- offset, flags, iobref, xdata);
- if (!stub)
- goto err;
+ STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- open_and_resume (this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, 0, 0, 0);
-
- return 0;
+ return 0;
}
-
-int
-ob_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+static int32_t
+ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- fd_t *wind_fd = NULL;
+ ob_inode_t *ob_inode;
+ call_frame_t *open_frame;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * or the current flags require a synchronous open. */
+ return default_open(frame, this, loc, flags, fd, xdata);
+ }
+
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
- wind_fd = ob_get_wind_fd (this, fd);
+ state = -ENOMEM;
+ }
+
+ if (state == OB_STATE_FIRST_OPEN) {
+ /* We try to create a stub for the new open. A new frame needs to be
+ * used because the current one may be destroyed soon after sending
+ * the open's reply. */
+ open_frame = copy_frame(frame);
+ if (open_frame != NULL) {
+ stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
+ xdata);
+ if (stub != NULL) {
+ open_frame->local = ob_inode;
+
+ /* TODO: Previous version passed xdata back to the caller, but
+ * probably this doesn't make sense since it won't contain
+ * any requested data. I think it would be better to pass
+ * NULL for xdata. */
+ default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
+
+ return ob_open_dispatch(this, ob_inode, first_fd, stub);
+ }
+
+ STACK_DESTROY(open_frame->root);
+ }
- stub = fop_fstat_stub (frame, default_fstat_resume, wind_fd, xdata);
+ /* In case of error, simulate a regular completion but with an error
+ * code. */
+ ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
- fd_unref (wind_fd);
+ state = -ENOMEM;
+ }
- if (!stub)
- goto err;
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
- open_and_resume (this, wind_fd, stub);
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
+ }
+ UNLOCK(&fd->inode->lock);
- return 0;
-err:
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, 0, 0);
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "open", "path=%s", loc->path, NULL);
- return 0;
+ return default_open_failure_cbk(frame, -state);
}
-
-int
-ob_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+static int32_t
+ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- ob_fd_t *ob_fd = NULL;
- gf_boolean_t unwind = _gf_false;
-
- LOCK (&fd->lock);
- {
- ob_fd = __ob_fd_ctx_get (this, fd);
- if (ob_fd && ob_fd->open_frame)
- /* if open() was never wound to backend,
- no need to wind flush() either.
- */
- unwind = _gf_true;
- }
- UNLOCK (&fd->lock);
-
- if (unwind)
- goto unwind;
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ /* Create requests are never delayed. We always send them synchronously. */
+ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode,
+ &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * so we simply forward the request synchronously. */
+ return default_create(frame, this, loc, flags, mode, umask, fd, xdata);
+ }
+
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd,
+ xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
- stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
- if (!stub)
- goto err;
+ state = -ENOMEM;
+ }
- open_and_resume (this, fd, stub);
+ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never
+ * be returned by ob_open_and_resume_fd(). If we are here it can only be
+ * because there has been a problem. */
- return 0;
-err:
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, 0);
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
- return 0;
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
+ }
+ UNLOCK(&fd->inode->lock);
-unwind:
- STACK_UNWIND_STRICT (flush, frame, 0, 0, 0);
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "create", "path=%s", loc->path, NULL);
- return 0;
+ return default_create_failure_cbk(frame, -state);
}
-
-int
-ob_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int flag,
- dict_t *xdata)
+static int32_t
+ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fsync_stub (frame, default_fsync_resume, fd, flag, xdata);
- if (!stub)
- goto err;
+ ob_conf_t *conf = this->private;
+ bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
- open_and_resume (this, fd, stub);
+ OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
- return 0;
-err:
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
-
- return 0;
+ return 0;
}
-
-int
-ob_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock, dict_t *xdata)
+static int32_t
+ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, flock, xdata);
- if (!stub)
- goto err;
-
- open_and_resume (this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, 0, 0);
+ OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
+ iobref, xdata);
- return 0;
+ return 0;
}
-int
-ob_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+static int32_t
+ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset,
- xdata);
- if (!stub)
- goto err;
-
- open_and_resume (this, fd, stub);
+ ob_conf_t *conf = this->private;
+ bool trigger = !conf->use_anonymous_fd;
- return 0;
-err:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+ OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
- return 0;
+ return 0;
}
-
-int
-ob_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
- int flags, dict_t *xdata)
+static int32_t
+ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ ob_conf_t *conf = this->private;
+ bool trigger = !conf->use_anonymous_fd;
- stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr,
- flags, xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
- open_and_resume (this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, 0);
-
- return 0;
+ return 0;
}
-
-int
-ob_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
- dict_t *xdata)
+static int32_t
+ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name,
- xdata);
- if (!stub)
- goto err;
+ OB_POST_FLUSH(this, frame, fd, fd, xdata);
- open_and_resume (this, fd, stub);
-
- return 0;
-err:
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, 0, 0);
-
- return 0;
+ return 0;
}
-
-int
-ob_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+static int32_t
+ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fremovexattr_stub (frame, default_fremovexattr_resume, fd,
- name, xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
- open_and_resume (this, fd, stub);
+ return 0;
+}
- return 0;
-err:
- STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOMEM, 0);
+static int32_t
+ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
- return 0;
+ return 0;
}
-
-int
-ob_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- int cmd, struct gf_flock *flock, dict_t *xdata)
+static int32_t
+ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_finodelk_stub (frame, default_finodelk_resume, volume, fd,
- cmd, flock, xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
- open_and_resume (this, fd, stub);
+ return 0;
+}
- return 0;
-err:
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, 0);
+static int32_t
+ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
- return 0;
+ return 0;
}
-
-int
-ob_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
+static int32_t
+ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fentrylk_stub (frame, default_fentrylk_resume, volume, fd,
- basename, cmd, type, xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
- open_and_resume (this, fd, stub);
+ return 0;
+}
- return 0;
-err:
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, 0);
+static int32_t
+ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
- return 0;
+ return 0;
}
-
-int
-ob_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+static int32_t
+ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int cmd, struct gf_flock *flock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fxattrop_stub (frame, default_fxattrop_resume, fd, optype,
- xattr, xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
- open_and_resume (this, fd, stub);
+ return 0;
+}
- return 0;
-err:
- STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, 0, 0);
+static int32_t
+ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
+ xdata);
- return 0;
+ return 0;
}
-
-int
-ob_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *iatt, int valid, dict_t *xdata)
+static int32_t
+ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = fop_fsetattr_stub (frame, default_fsetattr_resume, fd,
- iatt, valid, xdata);
- if (!stub)
- goto err;
+ OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
- open_and_resume (this, fd, stub);
+ return 0;
+}
- return 0;
-err:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, 0, 0, 0);
+static int32_t
+ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
+ int valid, dict_t *xdata)
+{
+ OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
- return 0;
+ return 0;
}
-int
+static int32_t
ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
+ off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub;
-
- stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode,
- offset, len, xdata);
- if (!stub)
- goto err;
-
- open_and_resume(this, fd, stub);
+ OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
- return 0;
-err:
- STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
+static int32_t
ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
+ size_t len, dict_t *xdata)
{
- call_stub_t *stub;
+ OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
- stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
- xdata);
- if (!stub)
- goto err;
+ return 0;
+}
- open_and_resume(this, fd, stub);
+static int32_t
+ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
- return 0;
-err:
- STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
- dict_t *xdata)
+static int32_t
+ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
{
- fd_t *fd = NULL;
- call_stub_t *stub = NULL;
+ OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
- stub = fop_unlink_stub (frame, default_unlink_resume, loc,
- xflags, xdata);
- if (!stub)
- goto err;
+ return 0;
+}
- fd = fd_lookup (loc->inode, 0);
+static int32_t
+ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
+ dict_t *xdata)
+{
+ OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
- open_and_resume (this, fd, stub);
+ return 0;
+}
- return 0;
-err:
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, 0, 0, 0);
+static int32_t
+ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
+ xdata);
- return 0;
+ return 0;
}
-
-int
-ob_rename (call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
- dict_t *xdata)
+static int32_t
+ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- fd_t *fd = NULL;
- call_stub_t *stub = NULL;
+ if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
+ dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
+ dict_get(dict, GF_SELINUX_XATTR_KEY)) {
+ return default_setxattr(frame, this, loc, dict, flags, xdata);
+ }
- stub = fop_rename_stub (frame, default_rename_resume, src, dst, xdata);
- if (!stub)
- goto err;
+ OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
+ xdata);
- if (dst->inode)
- fd = fd_lookup (dst->inode, 0);
+ return 0;
+}
- open_and_resume (this, fd, stub);
+static void
+ob_fdclose(xlator_t *this, fd_t *fd)
+{
+ struct list_head list;
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+
+ INIT_LIST_HEAD(&list);
+ stub = NULL;
+
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode = ob_inode_get_locked(this, fd->inode);
+ if (ob_inode != NULL) {
+ ob_inode->open_count--;
+
+ /* If this fd is the same as ob_inode->first_fd, it means that
+ * the initial open has not fully completed. We'll try to cancel
+ * it. */
+ if (ob_inode->first_fd == fd) {
+ if (ob_inode->first_open == OB_OPEN_PREPARING) {
+ /* In this case ob_open_dispatch() has not been called yet.
+ * We clear first_fd and first_open to allow that function
+ * to know that the open is not really needed. This also
+ * allows other requests to work as expected if they
+ * arrive before the dispatch function is called. If there
+ * are pending fops, we can directly process them here.
+ * (note that there shouldn't be any fd related fops, but
+ * if there are, it's fine if they fail). */
+ ob_inode->first_fd = NULL;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = false;
+ list_splice_init(&ob_inode->resume_fops, &list);
+ } else if (!ob_inode->triggered) {
+ /* If the open has already been dispatched, we can only
+ * cancel it if it has not been triggered. Otherwise we
+ * simply wait until it completes. While it's not triggered,
+ * first_open must be a valid stub and there can't be any
+ * pending fops. */
+ GF_ASSERT((ob_inode->first_open != NULL) &&
+ list_empty(&ob_inode->resume_fops));
+
+ ob_inode->first_fd = NULL;
+ stub = ob_inode->first_open;
+ ob_inode->first_open = NULL;
+ }
+ }
+ }
+ }
+ UNLOCK(&fd->inode->lock);
- return 0;
-err:
- STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
+ if (stub != NULL) {
+ ob_open_destroy(stub, fd);
+ }
- return 0;
+ ob_resume_pending(&list);
}
-
int
-ob_release (xlator_t *this, fd_t *fd)
+ob_forget(xlator_t *this, inode_t *inode)
{
- ob_fd_t *ob_fd = NULL;
+ ob_inode_t *ob_inode;
+ uint64_t value = 0;
- ob_fd = ob_fd_ctx_get (this, fd);
+ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
+ ob_inode = (ob_inode_t *)(uintptr_t)value;
+ GF_FREE(ob_inode);
+ }
- ob_fd_free (ob_fd);
-
- return 0;
+ return 0;
}
-
int
-ob_priv_dump (xlator_t *this)
+ob_priv_dump(xlator_t *this)
{
- ob_conf_t *conf = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ ob_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
- conf = this->private;
+ conf = this->private;
- if (!conf)
- return -1;
+ if (!conf)
+ return -1;
- gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind",
- "priv");
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
+ "priv");
- gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
- gf_proc_dump_write ("use_anonymous_fd", "%d", conf->use_anonymous_fd);
+ gf_proc_dump_write("use_anonymous_fd", "%d", conf->use_anonymous_fd);
- gf_proc_dump_write ("lazy_open", "%d", conf->lazy_open);
+ gf_proc_dump_write("lazy_open", "%d", conf->lazy_open);
- return 0;
+ return 0;
}
-
int
-ob_fdctx_dump (xlator_t *this, fd_t *fd)
+ob_fdctx_dump(xlator_t *this, fd_t *fd)
{
- ob_fd_t *ob_fd = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- int ret = 0;
-
- ret = TRY_LOCK (&fd->lock);
- if (ret)
- return 0;
-
- ob_fd = __ob_fd_ctx_get (this, fd);
- if (!ob_fd) {
- UNLOCK (&fd->lock);
- return 0;
- }
-
- gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind",
- "file");
- gf_proc_dump_add_section (key_prefix);
-
- gf_proc_dump_write ("fd", "%p", fd);
-
- gf_proc_dump_write ("open_frame", "%p", ob_fd->open_frame);
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ uint64_t value = 0;
+ int ret = 0, error = 0;
+
+ ret = TRY_LOCK(&fd->lock);
+ if (ret)
+ return 0;
- gf_proc_dump_write ("open_frame.root.unique", "%p",
- ob_fd->open_frame->root->unique);
+ if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
+ error = (int32_t)value;
+ }
- gf_proc_dump_write ("loc.path", "%s", ob_fd->loc.path);
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
+ "file");
+ gf_proc_dump_add_section("%s", key_prefix);
- gf_proc_dump_write ("loc.ino", "%s", uuid_utoa (ob_fd->loc.gfid));
+ gf_proc_dump_write("fd", "%p", fd);
- gf_proc_dump_write ("flags", "%p", ob_fd->open_frame);
+ gf_proc_dump_write("error", "%d", error);
- UNLOCK (&fd->lock);
+ UNLOCK(&fd->lock);
- return 0;
+ return 0;
}
-
int
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- ret = xlator_mem_acct_init (this, gf_ob_mt_end + 1);
+ ret = xlator_mem_acct_init(this, gf_ob_mt_end + 1);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting failed");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, OPEN_BEHIND_MSG_NO_MEMORY,
+ "Memory accounting failed");
- return ret;
+ return ret;
}
-
int
-reconfigure (xlator_t *this, dict_t *options)
+reconfigure(xlator_t *this, dict_t *options)
{
- ob_conf_t *conf = NULL;
- int ret = -1;
+ ob_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
- conf = this->private;
+ GF_OPTION_RECONF("use-anonymous-fd", conf->use_anonymous_fd, options, bool,
+ out);
- GF_OPTION_RECONF ("use-anonymous-fd", conf->use_anonymous_fd, options,
- bool, out);
+ GF_OPTION_RECONF("lazy-open", conf->lazy_open, options, bool, out);
- GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out);
+ GF_OPTION_RECONF("read-after-open", conf->read_after_open, options, bool,
+ out);
- ret = 0;
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-init (xlator_t *this)
+init(xlator_t *this)
{
- ob_conf_t *conf = NULL;
+ ob_conf_t *conf = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child",
+ this->name);
+ return -1;
+ }
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
- return -1;
- }
+ if (!this->parents)
+ gf_msg(this->name, GF_LOG_WARNING, 0, OPEN_BEHIND_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_ob_mt_conf_t);
+ if (!conf)
+ goto err;
- if (!this->parents)
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ GF_OPTION_INIT("use-anonymous-fd", conf->use_anonymous_fd, bool, err);
- conf = GF_CALLOC (1, sizeof (*conf), gf_ob_mt_conf_t);
- if (!conf)
- goto err;
+ GF_OPTION_INIT("lazy-open", conf->lazy_open, bool, err);
- GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err);
+ GF_OPTION_INIT("read-after-open", conf->read_after_open, bool, err);
- GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err);
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, err);
- this->private = conf;
+ this->private = conf;
- return 0;
+ return 0;
err:
- if (conf)
- GF_FREE (conf);
+ if (conf)
+ GF_FREE(conf);
- return -1;
+ return -1;
}
-
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- ob_conf_t *conf = NULL;
+ ob_conf_t *conf = NULL;
- conf = this->private;
+ conf = this->private;
- GF_FREE (conf);
+ GF_FREE(conf);
- return;
+ return;
}
-
struct xlator_fops fops = {
- .open = ob_open,
- .readv = ob_readv,
- .writev = ob_writev,
- .flush = ob_flush,
- .fsync = ob_fsync,
- .fstat = ob_fstat,
- .ftruncate = ob_ftruncate,
- .fsetxattr = ob_fsetxattr,
- .fgetxattr = ob_fgetxattr,
- .fremovexattr = ob_fremovexattr,
- .finodelk = ob_finodelk,
- .fentrylk = ob_fentrylk,
- .fxattrop = ob_fxattrop,
- .fsetattr = ob_fsetattr,
- .fallocate = ob_fallocate,
- .discard = ob_discard,
- .unlink = ob_unlink,
- .rename = ob_rename,
- .lk = ob_lk,
+ .open = ob_open,
+ .create = ob_create,
+ .readv = ob_readv,
+ .writev = ob_writev,
+ .flush = ob_flush,
+ .fsync = ob_fsync,
+ .fstat = ob_fstat,
+ .seek = ob_seek,
+ .ftruncate = ob_ftruncate,
+ .fsetxattr = ob_fsetxattr,
+ .setxattr = ob_setxattr,
+ .fgetxattr = ob_fgetxattr,
+ .fremovexattr = ob_fremovexattr,
+ .finodelk = ob_finodelk,
+ .fentrylk = ob_fentrylk,
+ .fxattrop = ob_fxattrop,
+ .fsetattr = ob_fsetattr,
+ .setattr = ob_setattr,
+ .fallocate = ob_fallocate,
+ .discard = ob_discard,
+ .zerofill = ob_zerofill,
+ .unlink = ob_unlink,
+ .rename = ob_rename,
+ .lk = ob_lk,
};
struct xlator_cbks cbks = {
- .release = ob_release,
+ .fdclose = ob_fdclose,
+ .forget = ob_forget,
};
struct xlator_dumpops dumpops = {
- .priv = ob_priv_dump,
- .fdctx = ob_fdctx_dump,
+ .priv = ob_priv_dump,
+ .fdctx = ob_fdctx_dump,
};
-
struct volume_options options[] = {
- { .key = {"use-anonymous-fd"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "yes",
- .description = "For read operations, use anonymous FD when "
- "original FD is open-behind and not yet opened in the backend.",
- },
- { .key = {"lazy-open"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "yes",
- .description = "Perform open in the backend only when a necessary "
- "FOP arrives (e.g writev on the FD, unlink of the file). When option "
- "is disabled, perform backend open right after unwinding open().",
- },
- { .key = {NULL} }
+ {
+ .key = {"open-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable open-behind",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"use-anonymous-fd"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description =
+ "For read operations, use anonymous FD when "
+ "original FD is open-behind and not yet opened in the backend.",
+ },
+ {
+ .key = {"lazy-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description =
+ "Perform open in the backend only when a necessary "
+ "FOP arrives (e.g writev on the FD, unlink of the file). When "
+ "option "
+ "is disabled, perform backend open right after unwinding open().",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {},
+ /* option_validation_fn validate_fn; */
+ },
+ {
+ .key = {"read-after-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "read is sent only after actual open happens and real "
+ "fd is obtained, instead of doing on anonymous fd "
+ "(similar to write)",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {},
+ /* option_validation_fn validate_fn; */
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"open-behind"},
+ .description = "Enable/Disable open behind translator"},
+ {.key = {NULL}}
+
+};
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "open-behind",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am
index 4906f408abc..8eb6cece738 100644
--- a/xlators/performance/quick-read/src/Makefile.am
+++ b/xlators/performance/quick-read/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = quick-read.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-quick_read_la_LDFLAGS = -module -avoid-version
+quick_read_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quick_read_la_SOURCES = quick-read.c
quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quick-read.h quick-read-mem-types.h
+noinst_HEADERS = quick-read.h quick-read-mem-types.h quick-read-messages.h
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h
index 78547f64116..e4aef8549ff 100644
--- a/xlators/performance/quick-read/src/quick-read-mem-types.h
+++ b/xlators/performance/quick-read/src/quick-read-mem-types.h
@@ -11,17 +11,13 @@
#ifndef __QR_MEM_TYPES_H__
#define __QR_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_qr_mem_types_ {
- gf_qr_mt_qr_inode_t = gf_common_mt_end + 1,
- gf_qr_mt_content_t,
- gf_qr_mt_qr_fd_ctx_t,
- gf_qr_mt_iovec,
- gf_qr_mt_qr_conf_t,
- gf_qr_mt_qr_priority_t,
- gf_qr_mt_qr_private_t,
- gf_qr_mt_qr_unlink_ctx_t,
- gf_qr_mt_end
+ gf_qr_mt_qr_inode_t = gf_common_mt_end + 1,
+ gf_qr_mt_content_t,
+ gf_qr_mt_qr_priority_t,
+ gf_qr_mt_qr_private_t,
+ gf_qr_mt_end
};
#endif
diff --git a/xlators/performance/quick-read/src/quick-read-messages.h b/xlators/performance/quick-read/src/quick-read-messages.h
new file mode 100644
index 00000000000..da9724a3c9c
--- /dev/null
+++ b/xlators/performance/quick-read/src/quick-read-messages.h
@@ -0,0 +1,31 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUICK_READ_MESSAGES_H_
+#define _QUICK_READ_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(QUICK_READ, QUICK_READ_MSG_ENFORCEMENT_FAILED,
+ QUICK_READ_MSG_INVALID_ARGUMENT,
+ QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED, QUICK_READ_MSG_NO_MEMORY,
+ QUICK_READ_MSG_VOL_MISCONFIGURED, QUICK_READ_MSG_DICT_SET_FAILED,
+ QUICK_READ_MSG_INVALID_CONFIG, QUICK_READ_MSG_LRU_NOT_EMPTY);
+
+#endif /* _QUICK_READ_MESSAGES_H_ */
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index 445ea8658ea..7fe4b3c3a4b 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -8,1140 +8,1637 @@
cases as published by the Free Software Foundation.
*/
+#include <math.h>
#include "quick-read.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
+#include "quick-read-messages.h"
+#include <glusterfs/upcall-utils.h>
+#include <glusterfs/atomic.h>
-qr_inode_t *qr_inode_ctx_get (xlator_t *this, inode_t *inode);
-void __qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode);
+typedef struct qr_local {
+ inode_t *inode;
+ uint64_t incident_gen;
+ fd_t *fd;
+} qr_local_t;
+qr_inode_t *
+qr_inode_ctx_get(xlator_t *this, inode_t *inode);
-int
-__qr_inode_ctx_set (xlator_t *this, inode_t *inode, qr_inode_t *qr_inode)
+void
+__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table,
+ qr_inode_t *qr_inode);
+
+void
+qr_local_wipe(qr_local_t *local)
{
- uint64_t value = 0;
- int ret = -1;
+ if (!local)
+ goto out;
- value = (long) qr_inode;
+ if (local->inode)
+ inode_unref(local->inode);
- ret = __inode_ctx_set (inode, this, &value);
+ if (local->fd)
+ fd_unref(local->fd);
- return ret;
+ GF_FREE(local);
+out:
+ return;
}
+uint64_t
+__qr_get_generation(xlator_t *this, qr_inode_t *qr_inode)
+{
+ uint64_t gen = 0, rollover;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+
+ gen = GF_ATOMIC_INC(priv->generation);
+ if (gen == 0) {
+ qr_inode->gen_rollover = !qr_inode->gen_rollover;
+ gen = GF_ATOMIC_INC(priv->generation);
+ __qr_inode_prune_data(this, table, qr_inode);
+ qr_inode->gen = qr_inode->invalidation_time = gen - 1;
+ }
+
+ rollover = qr_inode->gen_rollover;
+ gen |= (rollover << 32);
+ return gen;
+}
-qr_inode_t *
-__qr_inode_ctx_get (xlator_t *this, inode_t *inode)
+uint64_t
+qr_get_generation(xlator_t *this, inode_t *inode)
{
- qr_inode_t *qr_inode = NULL;
- uint64_t value = 0;
- int ret = -1;
+ qr_inode_t *qr_inode = NULL;
+ uint64_t gen = 0;
+ qr_inode_table_t *table = NULL;
+ qr_private_t *priv = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+
+ qr_inode = qr_inode_ctx_get(this, inode);
+
+ if (qr_inode) {
+ LOCK(&table->lock);
+ {
+ gen = __qr_get_generation(this, qr_inode);
+ }
+ UNLOCK(&table->lock);
+ } else {
+ gen = GF_ATOMIC_INC(priv->generation);
+ if (gen == 0) {
+ gen = GF_ATOMIC_INC(priv->generation);
+ }
+ }
+
+ return gen;
+}
- ret = __inode_ctx_get (inode, this, &value);
- if (ret)
- return NULL;
+qr_local_t *
+qr_local_get(xlator_t *this, inode_t *inode)
+{
+ qr_local_t *local = NULL;
- qr_inode = (void *) ((long) value);
+ local = GF_CALLOC(1, sizeof(*local), gf_common_mt_char);
+ if (!local)
+ goto out;
- return qr_inode;
+ local->incident_gen = qr_get_generation(this, inode);
+out:
+ return local;
}
+#define QR_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ qr_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ qr_local_wipe(__local); \
+ } while (0)
+
+void
+__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode,
+ uint64_t gen);
+
+int
+__qr_inode_ctx_set(xlator_t *this, inode_t *inode, qr_inode_t *qr_inode)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ value = (long)qr_inode;
+
+ ret = __inode_ctx_set(inode, this, &value);
+
+ return ret;
+}
qr_inode_t *
-qr_inode_ctx_get (xlator_t *this, inode_t *inode)
+__qr_inode_ctx_get(xlator_t *this, inode_t *inode)
{
- qr_inode_t *qr_inode = NULL;
+ qr_inode_t *qr_inode = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- LOCK (&inode->lock);
- {
- qr_inode = __qr_inode_ctx_get (this, inode);
- }
- UNLOCK (&inode->lock);
+ ret = __inode_ctx_get(inode, this, &value);
+ if (ret)
+ return NULL;
- return qr_inode;
+ qr_inode = (void *)((long)value);
+
+ return qr_inode;
}
+qr_inode_t *
+qr_inode_ctx_get(xlator_t *this, inode_t *inode)
+{
+ qr_inode_t *qr_inode = NULL;
+
+ if (inode == NULL)
+ goto out;
+
+ LOCK(&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get(this, inode);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return qr_inode;
+}
qr_inode_t *
-qr_inode_new (xlator_t *this, inode_t *inode)
+qr_inode_new(xlator_t *this, inode_t *inode)
{
- qr_inode_t *qr_inode = NULL;
+ qr_inode_t *qr_inode = NULL;
- qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t);
- if (!qr_inode)
- return NULL;
+ qr_inode = GF_CALLOC(1, sizeof(*qr_inode), gf_qr_mt_qr_inode_t);
+ if (!qr_inode)
+ return NULL;
- INIT_LIST_HEAD (&qr_inode->lru);
+ INIT_LIST_HEAD(&qr_inode->lru);
- qr_inode->priority = 0; /* initial priority */
+ qr_inode->priority = 0; /* initial priority */
- return qr_inode;
+ return qr_inode;
}
-
qr_inode_t *
-qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode)
+qr_inode_ctx_get_or_new(xlator_t *this, inode_t *inode)
{
- qr_inode_t *qr_inode = NULL;
- int ret = -1;
- qr_private_t *priv = NULL;
-
- priv = this->private;
-
- LOCK (&inode->lock);
- {
- qr_inode = __qr_inode_ctx_get (this, inode);
- if (qr_inode)
- goto unlock;
-
- qr_inode = qr_inode_new (this, inode);
- if (!qr_inode)
- goto unlock;
-
- ret = __qr_inode_ctx_set (this, inode, qr_inode);
- if (ret) {
- __qr_inode_prune (&priv->table, qr_inode);
- GF_FREE (qr_inode);
- }
- }
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ qr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ LOCK(&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get(this, inode);
+ if (qr_inode)
+ goto unlock;
+
+ qr_inode = qr_inode_new(this, inode);
+ if (!qr_inode)
+ goto unlock;
+
+ ret = __qr_inode_ctx_set(this, inode, qr_inode);
+ if (ret) {
+ __qr_inode_prune(this, &priv->table, qr_inode, 0);
+ GF_FREE(qr_inode);
+ qr_inode = NULL;
+ }
+ }
unlock:
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
- return qr_inode;
+ return qr_inode;
}
-
uint32_t
-qr_get_priority (qr_conf_t *conf, const char *path)
+qr_get_priority(qr_conf_t *conf, const char *path)
{
- uint32_t priority = 0;
- struct qr_priority *curr = NULL;
+ uint32_t priority = 0;
+ struct qr_priority *curr = NULL;
- list_for_each_entry (curr, &conf->priority_list, list) {
- if (fnmatch (curr->pattern, path, FNM_NOESCAPE) == 0)
- priority = curr->priority;
- }
+ list_for_each_entry(curr, &conf->priority_list, list)
+ {
+ if (fnmatch(curr->pattern, path, FNM_NOESCAPE) == 0)
+ priority = curr->priority;
+ }
- return priority;
+ return priority;
}
-
void
-__qr_inode_register (qr_inode_table_t *table, qr_inode_t *qr_inode)
+__qr_inode_register(xlator_t *this, qr_inode_table_t *table,
+ qr_inode_t *qr_inode)
{
- if (!qr_inode->data)
- return;
+ qr_private_t *priv = NULL;
- if (list_empty (&qr_inode->lru))
- /* first time addition of this qr_inode into table */
- table->cache_used += qr_inode->size;
- else
- list_del_init (&qr_inode->lru);
+ if (!qr_inode->data)
+ return;
- list_add_tail (&qr_inode->lru, &table->lru[qr_inode->priority]);
-}
+ priv = this->private;
+ if (!priv)
+ return;
+ if (list_empty(&qr_inode->lru))
+ /* first time addition of this qr_inode into table */
+ table->cache_used += qr_inode->size;
+ else
+ list_del_init(&qr_inode->lru);
+
+ list_add_tail(&qr_inode->lru, &table->lru[qr_inode->priority]);
+
+ GF_ATOMIC_INC(priv->qr_counter.files_cached);
+
+ return;
+}
void
-qr_inode_set_priority (xlator_t *this, inode_t *inode, const char *path)
+qr_inode_set_priority(xlator_t *this, inode_t *inode, const char *path)
{
- uint32_t priority = 0;
- qr_inode_table_t *table = NULL;
- qr_inode_t *qr_inode = NULL;
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
-
- qr_inode = qr_inode_ctx_get (this, inode);
- if (!qr_inode)
- return;
-
- priv = this->private;
- table = &priv->table;
- conf = &priv->conf;
-
- if (path)
- priority = qr_get_priority (conf, path);
- else
- /* retain existing priority, just bump LRU */
- priority = qr_inode->priority;
-
- LOCK (&table->lock);
- {
- qr_inode->priority = priority;
-
- __qr_inode_register (table, qr_inode);
- }
- UNLOCK (&table->lock);
-}
+ uint32_t priority = 0;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ return;
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
+
+ if (path)
+ priority = qr_get_priority(conf, path);
+ else
+ /* retain existing priority, just bump LRU */
+ priority = qr_inode->priority;
+
+ LOCK(&table->lock);
+ {
+ qr_inode->priority = priority;
+
+ __qr_inode_register(this, table, qr_inode);
+ }
+ UNLOCK(&table->lock);
+}
-/* To be called with priv->table.lock held */
void
-__qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode)
+__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table,
+ qr_inode_t *qr_inode)
{
- GF_FREE (qr_inode->data);
- qr_inode->data = NULL;
+ qr_private_t *priv = NULL;
- if (!list_empty (&qr_inode->lru)) {
- table->cache_used -= qr_inode->size;
- qr_inode->size = 0;
+ priv = this->private;
- list_del_init (&qr_inode->lru);
- }
+ GF_FREE(qr_inode->data);
+ qr_inode->data = NULL;
- memset (&qr_inode->buf, 0, sizeof (qr_inode->buf));
-}
+ if (!list_empty(&qr_inode->lru)) {
+ table->cache_used -= qr_inode->size;
+ qr_inode->size = 0;
+
+ list_del_init(&qr_inode->lru);
+ GF_ATOMIC_DEC(priv->qr_counter.files_cached);
+ }
+
+ memset(&qr_inode->buf, 0, sizeof(qr_inode->buf));
+}
+/* To be called with priv->table.lock held */
void
-qr_inode_prune (xlator_t *this, inode_t *inode)
+__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode,
+ uint64_t gen)
{
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- qr_inode_t *qr_inode = NULL;
-
- qr_inode = qr_inode_ctx_get (this, inode);
- if (!qr_inode)
- return;
-
- priv = this->private;
- table = &priv->table;
-
- LOCK (&table->lock);
- {
- __qr_inode_prune (table, qr_inode);
- }
- UNLOCK (&table->lock);
+ __qr_inode_prune_data(this, table, qr_inode);
+ if (gen)
+ qr_inode->gen = gen;
+ qr_inode->invalidation_time = __qr_get_generation(this, qr_inode);
}
+void
+qr_inode_prune(xlator_t *this, inode_t *inode, uint64_t gen)
+{
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
+
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ return;
+
+ priv = this->private;
+ table = &priv->table;
+
+ LOCK(&table->lock);
+ {
+ __qr_inode_prune(this, table, qr_inode, gen);
+ }
+ UNLOCK(&table->lock);
+}
/* To be called with priv->table.lock held */
void
-__qr_cache_prune (qr_inode_table_t *table, qr_conf_t *conf)
+__qr_cache_prune(xlator_t *this, qr_inode_table_t *table, qr_conf_t *conf)
{
- qr_inode_t *curr = NULL;
- qr_inode_t *next = NULL;
- int index = 0;
- size_t size_pruned = 0;
+ qr_inode_t *curr = NULL;
+ qr_inode_t *next = NULL;
+ int index = 0;
+ size_t size_pruned = 0;
- for (index = 0; index < conf->max_pri; index++) {
- list_for_each_entry_safe (curr, next, &table->lru[index], lru) {
+ for (index = 0; index < conf->max_pri; index++) {
+ list_for_each_entry_safe(curr, next, &table->lru[index], lru)
+ {
+ size_pruned += curr->size;
- size_pruned += curr->size;
+ __qr_inode_prune(this, table, curr, 0);
- __qr_inode_prune (table, curr);
-
- if (table->cache_used < conf->cache_size)
- return;
- }
+ if (table->cache_used < conf->cache_size)
+ return;
}
+ }
- return;
+ return;
}
-
void
-qr_cache_prune (xlator_t *this)
+qr_cache_prune(xlator_t *this)
{
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
- qr_inode_table_t *table = NULL;
-
- priv = this->private;
- table = &priv->table;
- conf = &priv->conf;
-
- LOCK (&table->lock);
- {
- if (table->cache_used > conf->cache_size)
- __qr_cache_prune (table, conf);
- }
- UNLOCK (&table->lock);
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_table_t *table = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
+
+ LOCK(&table->lock);
+ {
+ if (table->cache_used > conf->cache_size)
+ __qr_cache_prune(this, table, conf);
+ }
+ UNLOCK(&table->lock);
}
-
void *
-qr_content_extract (dict_t *xdata)
+qr_content_extract(dict_t *xdata)
{
- data_t *data = NULL;
- void *content = NULL;
+ data_t *data = NULL;
+ void *content = NULL;
+ int ret = 0;
- data = dict_get (xdata, GF_CONTENT_KEY);
- if (!data)
- return NULL;
+ ret = dict_get_with_ref(xdata, GF_CONTENT_KEY, &data);
+ if (ret < 0 || !data)
+ return NULL;
- content = GF_CALLOC (1, data->len, gf_qr_mt_content_t);
- if (!content)
- return NULL;
+ content = GF_MALLOC(data->len, gf_qr_mt_content_t);
+ if (!content)
+ goto out;
- memcpy (content, data->data, data->len);
+ memcpy(content, data->data, data->len);
- return content;
+out:
+ data_unref(data);
+ return content;
}
-
void
-qr_content_update (xlator_t *this, qr_inode_t *qr_inode, void *data,
- struct iatt *buf)
+qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
+ struct iatt *buf, uint64_t gen)
{
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ uint32_t rollover = 0;
- priv = this->private;
- table = &priv->table;
+ rollover = gen >> 32;
+ gen = gen & 0xffffffff;
- LOCK (&table->lock);
- {
- __qr_inode_prune (table, qr_inode);
+ priv = this->private;
+ table = &priv->table;
- qr_inode->data = data;
- qr_inode->size = buf->ia_size;
+ LOCK(&table->lock);
+ {
+ if ((rollover != qr_inode->gen_rollover) ||
+ (gen && qr_inode->gen && (qr_inode->gen >= gen)))
+ goto unlock;
- qr_inode->ia_mtime = buf->ia_mtime;
- qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec;
+ if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen))
+ goto unlock;
- qr_inode->buf = *buf;
+ __qr_inode_prune(this, table, qr_inode, gen);
- gettimeofday (&qr_inode->last_refresh, NULL);
+ qr_inode->data = data;
+ data = NULL;
+ qr_inode->size = buf->ia_size;
- __qr_inode_register (table, qr_inode);
- }
- UNLOCK (&table->lock);
+ qr_inode->ia_mtime = buf->ia_mtime;
+ qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec;
+ qr_inode->ia_ctime = buf->ia_ctime;
+ qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec;
- qr_cache_prune (this);
-}
+ qr_inode->buf = *buf;
+ qr_inode->last_refresh = gf_time();
+ __qr_inode_register(this, table, qr_inode);
+ }
+unlock:
+ UNLOCK(&table->lock);
+
+ if (data)
+ GF_FREE(data);
+
+ qr_cache_prune(this);
+}
gf_boolean_t
-qr_size_fits (qr_conf_t *conf, struct iatt *buf)
+qr_size_fits(qr_conf_t *conf, struct iatt *buf)
{
- return (buf->ia_size <= conf->max_file_size);
+ return (buf->ia_size <= conf->max_file_size);
}
+gf_boolean_t
+qr_mtime_equal(qr_inode_t *qr_inode, struct iatt *buf)
+{
+ return (qr_inode->ia_mtime == buf->ia_mtime &&
+ qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec);
+}
gf_boolean_t
-qr_mtime_equal (qr_inode_t *qr_inode, struct iatt *buf)
+qr_ctime_equal(qr_inode_t *qr_inode, struct iatt *buf)
{
- return (qr_inode->ia_mtime == buf->ia_mtime &&
- qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec);
+ return (qr_inode->ia_ctime == buf->ia_ctime &&
+ qr_inode->ia_ctime_nsec == buf->ia_ctime_nsec);
}
+gf_boolean_t
+qr_time_equal(qr_conf_t *conf, qr_inode_t *qr_inode, struct iatt *buf)
+{
+ if (conf->ctime_invalidation)
+ return qr_ctime_equal(qr_inode, buf);
+ else
+ return qr_mtime_equal(qr_inode, buf);
+}
void
-__qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf)
+__qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf,
+ uint64_t gen)
{
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- qr_conf_t *conf = NULL;
-
- priv = this->private;
- table = &priv->table;
- conf = &priv->conf;
-
- if (qr_size_fits (conf, buf) && qr_mtime_equal (qr_inode, buf)) {
- qr_inode->buf = *buf;
-
- gettimeofday (&qr_inode->last_refresh, NULL);
-
- __qr_inode_register (table, qr_inode);
- } else {
- __qr_inode_prune (table, qr_inode);
- }
-
- return;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_conf_t *conf = NULL;
+ uint32_t rollover = 0;
+
+ rollover = gen >> 32;
+ gen = gen & 0xffffffff;
+
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
+
+ /* allow for rollover of frame->root->unique */
+ if ((rollover != qr_inode->gen_rollover) ||
+ (gen && qr_inode->gen && (qr_inode->gen >= gen)))
+ goto done;
+
+ if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen))
+ goto done;
+
+ qr_inode->gen = gen;
+
+ if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) {
+ qr_inode->buf = *buf;
+ qr_inode->last_refresh = gf_time();
+ __qr_inode_register(this, table, qr_inode);
+ } else {
+ __qr_inode_prune(this, table, qr_inode, gen);
+ }
+
+done:
+ return;
}
-
void
-qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf)
+qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf,
+ uint64_t gen)
{
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
- priv = this->private;
- table = &priv->table;
+ priv = this->private;
+ table = &priv->table;
- LOCK (&table->lock);
- {
- __qr_content_refresh (this, qr_inode, buf);
- }
- UNLOCK (&table->lock);
+ LOCK(&table->lock);
+ {
+ __qr_content_refresh(this, qr_inode, buf, gen);
+ }
+ UNLOCK(&table->lock);
}
-
gf_boolean_t
-__qr_cache_is_fresh (xlator_t *this, qr_inode_t *qr_inode)
+__qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode)
{
- qr_conf_t *conf = NULL;
- qr_private_t *priv = NULL;
- struct timeval now;
- struct timeval diff;
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
- priv = this->private;
- conf = &priv->conf;
+ priv = this->private;
+ conf = &priv->conf;
- gettimeofday (&now, NULL);
+ if (qr_inode->last_refresh < priv->last_child_down)
+ return _gf_false;
- timersub (&now, &qr_inode->last_refresh, &diff);
+ if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout)
+ return _gf_false;
- if (diff.tv_sec >= conf->cache_timeout)
- return _gf_false;
-
- return _gf_true;
+ return _gf_true;
}
-
int
-qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode_ret,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+qr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode_ret, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- void *content = NULL;
- qr_inode_t *qr_inode = NULL;
- inode_t *inode = NULL;
-
- inode = frame->local;
- frame->local = NULL;
-
- if (op_ret == -1) {
- qr_inode_prune (this, inode);
- goto out;
- }
-
- if (dict_get (xdata, "sh-failed")) {
- qr_inode_prune (this, inode);
- goto out;
- }
-
- content = qr_content_extract (xdata);
-
- if (content) {
- /* new content came along, always replace old content */
- qr_inode = qr_inode_ctx_get_or_new (this, inode);
- if (!qr_inode)
- /* no harm done */
- goto out;
-
- qr_content_update (this, qr_inode, content, buf);
- } else {
- /* purge old content if necessary */
- qr_inode = qr_inode_ctx_get (this, inode);
- if (!qr_inode)
- /* usual path for large files */
- goto out;
-
- qr_content_refresh (this, qr_inode, buf);
- }
-out:
- if (inode)
- inode_unref (inode);
+ void *content = NULL;
+ qr_inode_t *qr_inode = NULL;
+ inode_t *inode = NULL;
+ qr_local_t *local = NULL;
+
+ local = frame->local;
+ inode = local->inode;
+
+ if (op_ret == -1) {
+ qr_inode_prune(this, inode, local->incident_gen);
+ goto out;
+ }
+
+ if (dict_get(xdata, GLUSTERFS_BAD_INODE)) {
+ qr_inode_prune(this, inode, local->incident_gen);
+ goto out;
+ }
+
+ if (dict_get(xdata, "sh-failed")) {
+ qr_inode_prune(this, inode, local->incident_gen);
+ goto out;
+ }
+
+ content = qr_content_extract(xdata);
+
+ if (content) {
+ /* new content came along, always replace old content */
+ qr_inode = qr_inode_ctx_get_or_new(this, inode);
+ if (!qr_inode) {
+ /* no harm done */
+ GF_FREE(content);
+ goto out;
+ }
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode_ret,
- buf, xdata, postparent);
- return 0;
-}
+ qr_content_update(this, qr_inode, content, buf, local->incident_gen);
+ } else {
+ /* purge old content if necessary */
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ /* usual path for large files */
+ goto out;
+ qr_content_refresh(this, qr_inode, buf, local->incident_gen);
+ }
+out:
+ QR_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode_ret, buf, xdata,
+ postparent);
+ return 0;
+}
int
-qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+qr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
- qr_inode_t *qr_inode = NULL;
- int ret = -1;
- dict_t *new_xdata = NULL;
-
- priv = this->private;
- conf = &priv->conf;
-
- qr_inode = qr_inode_ctx_get (this, loc->inode);
- if (qr_inode && qr_inode->data)
- /* cached. only validate in qr_lookup_cbk */
- goto wind;
-
- if (!xdata)
- xdata = new_xdata = dict_new ();
-
- if (!xdata)
- goto wind;
-
- ret = 0;
- if (conf->max_file_size)
- ret = dict_set (xdata, GF_CONTENT_KEY,
- data_from_uint64 (conf->max_file_size));
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "cannot set key in request dict (%s)",
- loc->path);
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ dict_t *new_xdata = NULL;
+ qr_local_t *local = NULL;
+
+ priv = this->private;
+ conf = &priv->conf;
+ local = qr_local_get(this, loc->inode);
+ local->inode = inode_ref(loc->inode);
+ frame->local = local;
+
+ qr_inode = qr_inode_ctx_get(this, loc->inode);
+ if (qr_inode && qr_inode->data)
+ /* cached. only validate in qr_lookup_cbk */
+ goto wind;
+
+ if (!xdata)
+ xdata = new_xdata = dict_new();
+
+ if (!xdata)
+ goto wind;
+
+ ret = 0;
+ if (conf->max_file_size)
+ ret = dict_set(xdata, GF_CONTENT_KEY,
+ data_from_uint64(conf->max_file_size));
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_DICT_SET_FAILED,
+ "cannot set key in request dict (%s)", loc->path);
wind:
- frame->local = inode_ref (loc->inode);
+ STACK_WIND(frame, qr_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
- STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ if (new_xdata)
+ dict_unref(new_xdata);
- if (new_xdata)
- dict_unref (new_xdata);
-
- return 0;
+ return 0;
}
-
int
-qr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+qr_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- qr_inode_t *qr_inode = NULL;
+ gf_dirent_t *entry = NULL;
+ qr_inode_t *qr_inode = NULL;
+ qr_local_t *local = NULL;
- if (op_ret <= 0)
- goto unwind;
+ local = frame->local;
- list_for_each_entry (entry, &entries->list, list) {
- if (!entry->inode)
- continue;
+ if (op_ret <= 0)
+ goto unwind;
- qr_inode = qr_inode_ctx_get (this, entry->inode);
- if (!qr_inode)
- /* no harm */
- continue;
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
- qr_content_refresh (this, qr_inode, &entry->d_stat);
- }
+ qr_inode = qr_inode_ctx_get(this, entry->inode);
+ if (!qr_inode)
+ /* no harm */
+ continue;
+
+ qr_content_refresh(this, qr_inode, &entry->d_stat, local->incident_gen);
+ }
unwind:
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
+ QR_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
-
int
-qr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, dict_t *xdata)
+qr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, qr_readdirp_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp,
- fd, size, offset, xdata);
- return 0;
-}
+ qr_local_t *local = NULL;
+ local = qr_local_get(this, NULL);
+ frame->local = local;
+
+ STACK_WIND(frame, qr_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+ return 0;
+}
int
-qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+qr_readv_cached(call_frame_t *frame, qr_inode_t *qr_inode, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- xlator_t *this = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- int op_ret = -1;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct iovec iov = {0, };
- struct iatt buf = {0, };
+ xlator_t *this = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ int op_ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ struct iatt buf = {
+ 0,
+ };
+
+ this = frame->this;
+ priv = this->private;
+ table = &priv->table;
+
+ LOCK(&table->lock);
+ {
+ if (!qr_inode->data)
+ goto unlock;
+
+ if (offset >= qr_inode->size)
+ goto unlock;
+
+ if (!__qr_cache_is_fresh(this, qr_inode))
+ goto unlock;
+
+ op_ret = min(size, (qr_inode->size - offset));
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, op_ret);
+ if (!iobuf) {
+ op_ret = -1;
+ goto unlock;
+ }
- this = frame->this;
- priv = this->private;
- table = &priv->table;
+ iobref = iobref_new();
+ if (!iobref) {
+ op_ret = -1;
+ goto unlock;
+ }
- LOCK (&table->lock);
- {
- op_ret = -1;
+ iobref_add(iobref, iobuf);
- if (!qr_inode->data)
- goto unlock;
+ memcpy(iobuf->ptr, qr_inode->data + offset, op_ret);
- if (offset >= qr_inode->size)
- goto unlock;
+ buf = qr_inode->buf;
- if (!__qr_cache_is_fresh (this, qr_inode))
- goto unlock;
+ /* bump LRU */
+ __qr_inode_register(frame->this, table, qr_inode);
+ }
+unlock:
+ UNLOCK(&table->lock);
- op_ret = min (size, (qr_inode->size - offset));
+ if (op_ret >= 0) {
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = op_ret;
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, op_ret);
- if (!iobuf) {
- op_ret = -1;
- goto unlock;
- }
+ GF_ATOMIC_INC(priv->qr_counter.cache_hit);
+ STACK_UNWIND_STRICT(readv, frame, op_ret, 0, &iov, 1, &buf, iobref,
+ xdata);
+ } else {
+ GF_ATOMIC_INC(priv->qr_counter.cache_miss);
+ }
- iobref = iobref_new ();
- if (!iobref) {
- op_ret = -1;
- iobuf_unref (iobuf);
- goto unlock;
- }
+ if (iobuf)
+ iobuf_unref(iobuf);
- iobref_add (iobref, iobuf);
+ if (iobref)
+ iobref_unref(iobref);
- memcpy (iobuf->ptr, qr_inode->data + offset, op_ret);
+ return op_ret;
+}
- buf = qr_inode->buf;
+int
+qr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ qr_inode_t *qr_inode = NULL;
- /* bump LRU */
- __qr_inode_register (table, qr_inode);
- }
-unlock:
- UNLOCK (&table->lock);
+ qr_inode = qr_inode_ctx_get(this, fd->inode);
+ if (!qr_inode)
+ goto wind;
- if (op_ret > 0) {
- iov.iov_base = iobuf->ptr;
- iov.iov_len = op_ret;
+ if (qr_readv_cached(frame, qr_inode, size, offset, flags, xdata) < 0)
+ goto wind;
+
+ return 0;
+wind:
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
- STACK_UNWIND_STRICT (readv, frame, op_ret, 0, &iov, 1,
- &buf, iobref, xdata);
- }
+int32_t
+qr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ qr_local_t *local = NULL;
- if (iobuf)
- iobuf_unref (iobuf);
+ local = frame->local;
- if (iobref)
- iobref_unref (iobref);
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
- return op_ret;
+ QR_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-
int
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+qr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- qr_inode_t *qr_inode = NULL;
+ qr_local_t *local = NULL;
- qr_inode = qr_inode_ctx_get (this, fd->inode);
- if (!qr_inode)
- goto wind;
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
- if (qr_readv_cached (frame, qr_inode, size, offset, flags, xdata) <= 0)
- goto wind;
+ frame->local = local;
- return 0;
-wind:
- STACK_WIND (frame, default_readv_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+ STACK_WIND(frame, qr_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, iov, count, offset, flags,
+ iobref, xdata);
+ return 0;
}
+int32_t
+qr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
+
+ local = frame->local;
+ qr_inode_prune(this, local->inode, local->incident_gen);
+
+ QR_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
int
-qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
- int count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
+qr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- qr_inode_prune (this, fd->inode);
+ qr_local_t *local = NULL;
+
+ local = qr_local_get(this, loc->inode);
+ local->inode = inode_ref(loc->inode);
+ frame->local = local;
- STACK_WIND (frame, default_writev_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, iov, count, offset, flags, iobref, xdata);
- return 0;
+ STACK_WIND(frame, qr_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
+int32_t
+qr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
+
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
+
+ QR_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
int
-qr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+qr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- qr_inode_prune (this, loc->inode);
+ qr_local_t *local = NULL;
+
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
- STACK_WIND (frame, default_truncate_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
- loc, offset, xdata);
- return 0;
+ STACK_WIND(frame, qr_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
+int32_t
+qr_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
-int
-qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
+
+ QR_STACK_UNWIND(fallocate, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int
+qr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int keep_size,
+ off_t offset, size_t len, dict_t *xdata)
{
- qr_inode_prune (this, fd->inode);
+ qr_local_t *local = NULL;
- STACK_WIND (frame, default_ftruncate_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
- fd, offset, xdata);
- return 0;
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
+
+ STACK_WIND(frame, qr_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len,
+ xdata);
+ return 0;
}
+int32_t
+qr_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
+
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
-int
-qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- fd_t *fd, dict_t *xdata)
+ QR_STACK_UNWIND(discard, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int
+qr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- qr_inode_set_priority (this, fd->inode, loc->path);
+ qr_local_t *local = NULL;
- STACK_WIND (frame, default_open_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
+
+ STACK_WIND(frame, qr_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
}
-int
-qr_forget (xlator_t *this, inode_t *inode)
+int32_t
+qr_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- qr_inode_t *qr_inode = NULL;
+ qr_local_t *local = NULL;
- qr_inode = qr_inode_ctx_get (this, inode);
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
- if (!qr_inode)
- return 0;
+ QR_STACK_UNWIND(zerofill, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
- qr_inode_prune (this, inode);
+static int
+qr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
- GF_FREE (qr_inode);
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
- return 0;
+ STACK_WIND(frame, qr_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
}
+int
+qr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
+{
+ qr_inode_set_priority(this, fd->inode, loc->path);
-int32_t
-qr_inodectx_dump (xlator_t *this, inode_t *inode)
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+qr_forget(xlator_t *this, inode_t *inode)
{
- qr_inode_t *qr_inode = NULL;
- int32_t ret = -1;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- char buf[256] = {0, };
+ qr_inode_t *qr_inode = NULL;
- qr_inode = qr_inode_ctx_get (this, inode);
- if (!qr_inode)
- goto out;
+ qr_inode = qr_inode_ctx_get(this, inode);
- gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read",
- "inodectx");
- gf_proc_dump_add_section (key_prefix);
+ if (!qr_inode)
+ return 0;
- gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->data ? "yes" : "no");
+ qr_inode_prune(this, inode, qr_get_generation(this, inode));
- if (qr_inode->last_refresh.tv_sec) {
- gf_time_fmt (buf, sizeof buf, qr_inode->last_refresh.tv_sec,
- gf_timefmt_FT);
- snprintf (buf + strlen (buf), sizeof buf - strlen (buf),
- ".%"GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec);
+ GF_FREE(qr_inode);
- gf_proc_dump_write ("last-cache-validation-time", "%s", buf);
- }
+ return 0;
+}
- ret = 0;
+int32_t
+qr_inodectx_dump(xlator_t *this, inode_t *inode)
+{
+ qr_inode_t *qr_inode = NULL;
+ int32_t ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char buf[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ goto out;
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read",
+ "inodectx");
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("entire-file-cached", "%s",
+ qr_inode->data ? "yes" : "no");
+
+ if (qr_inode->last_refresh) {
+ gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT);
+ gf_proc_dump_write("last-cache-validation-time", "%s", buf);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-qr_priv_dump (xlator_t *this)
+qr_priv_dump(xlator_t *this)
{
- qr_conf_t *conf = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- uint32_t file_count = 0;
- uint32_t i = 0;
- qr_inode_t *curr = NULL;
- uint64_t total_size = 0;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
-
- if (!this) {
- return -1;
- }
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ uint32_t file_count = 0;
+ uint32_t i = 0;
+ qr_inode_t *curr = NULL;
+ uint64_t total_size = 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
- priv = this->private;
- conf = &priv->conf;
+ if (!this) {
+ return -1;
+ }
- if (!conf)
- return -1;
+ priv = this->private;
+ conf = &priv->conf;
+ if (!conf)
+ return -1;
- table = &priv->table;
+ table = &priv->table;
- gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read",
- "priv");
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read", "priv");
- gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
- gf_proc_dump_write ("max_file_size", "%d", conf->max_file_size);
- gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout);
+ gf_proc_dump_write("max_file_size", "%" PRIu64, conf->max_file_size);
+ gf_proc_dump_write("cache_timeout", "%d", conf->cache_timeout);
- if (!table) {
- goto out;
- } else {
- for (i = 0; i < conf->max_pri; i++) {
- list_for_each_entry (curr, &table->lru[i], lru) {
- file_count++;
- total_size += curr->size;
- }
- }
+ if (!table) {
+ goto out;
+ } else {
+ for (i = 0; i < conf->max_pri; i++) {
+ list_for_each_entry(curr, &table->lru[i], lru)
+ {
+ file_count++;
+ total_size += curr->size;
+ }
}
+ }
- gf_proc_dump_write ("total_files_cached", "%d", file_count);
- gf_proc_dump_write ("total_cache_used", "%d", total_size);
+ gf_proc_dump_write("total_files_cached", "%d", file_count);
+ gf_proc_dump_write("total_cache_used", "%" PRIu64, total_size);
+ gf_proc_dump_write("cache-hit", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(priv->qr_counter.cache_hit));
+ gf_proc_dump_write("cache-miss", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(priv->qr_counter.cache_miss));
+ gf_proc_dump_write("cache-invalidations", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(priv->qr_counter.file_data_invals));
out:
- return 0;
+ return 0;
}
+static int32_t
+qr_dump_metrics(xlator_t *this, int fd)
+{
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+
+ dprintf(fd, "%s.total_files_cached %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.files_cached));
+ dprintf(fd, "%s.total_cache_used %" PRId64 "\n", this->name,
+ table->cache_used);
+ dprintf(fd, "%s.cache-hit %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.cache_hit));
+ dprintf(fd, "%s.cache-miss %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.cache_miss));
+ dprintf(fd, "%s.cache-invalidations %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.file_data_invals));
+
+ return 0;
+}
int32_t
-mem_acct_init (xlator_t *this)
+qr_mem_acct_init(xlator_t *this)
{
- int ret = -1;
-
- if (!this)
- return ret;
+ int ret = -1;
- ret = xlator_mem_acct_init (this, gf_qr_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_qr_mt_end + 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, QUICK_READ_MSG_NO_MEMORY,
+ "Memory accounting init failed");
return ret;
-}
+ }
+ return ret;
+}
static gf_boolean_t
-check_cache_size_ok (xlator_t *this, int64_t cache_size)
+check_cache_size_ok(xlator_t *this, int64_t cache_size)
{
- int ret = _gf_true;
- uint64_t total_mem = 0;
- uint64_t max_cache_size = 0;
- volume_option_t *opt = NULL;
-
- GF_ASSERT (this);
- opt = xlator_volume_option_get (this, "cache-size");
- if (!opt) {
- ret = _gf_false;
- gf_log (this->name, GF_LOG_ERROR,
- "could not get cache-size option");
- goto out;
- }
-
- total_mem = get_mem_size ();
- if (-1 == total_mem)
- max_cache_size = opt->max;
- else
- max_cache_size = total_mem;
-
- gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64,
- max_cache_size);
- if (cache_size > max_cache_size) {
- ret = _gf_false;
- gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64
- " is greater than the max size of %"PRIu64,
- cache_size, max_cache_size);
- goto out;
- }
+ int ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT(this);
+ opt = xlator_volume_option_get(this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ QUICK_READ_MSG_INVALID_ARGUMENT,
+ "could not get cache-size option");
+ goto out;
+ }
+
+ total_mem = get_mem_size();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, 0, QUICK_READ_MSG_INVALID_ARGUMENT,
+ "Cache size %" PRIu64
+ " is greater than the max size of %" PRIu64,
+ cache_size, max_cache_size);
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
int
-reconfigure (xlator_t *this, dict_t *options)
+qr_reconfigure(xlator_t *this, dict_t *options)
{
- int32_t ret = -1;
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
- uint64_t cache_size_new = 0;
+ int32_t ret = -1;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ uint64_t cache_size_new = 0;
- GF_VALIDATE_OR_GOTO ("quick-read", this, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
- GF_VALIDATE_OR_GOTO (this->name, options, out);
+ GF_VALIDATE_OR_GOTO("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, options, out);
- priv = this->private;
+ priv = this->private;
- conf = &priv->conf;
- if (!conf) {
- goto out;
- }
+ conf = &priv->conf;
+ if (!conf) {
+ goto out;
+ }
- GF_OPTION_RECONF ("cache-timeout", conf->cache_timeout, options, int32,
- out);
+ GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out);
- GF_OPTION_RECONF ("cache-size", cache_size_new, options, size, out);
- if (!check_cache_size_ok (this, cache_size_new)) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Not reconfiguring cache-size");
- goto out;
- }
- conf->cache_size = cache_size_new;
+ GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation,
+ options, bool, out);
+
+ GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64, out);
+ if (!check_cache_size_ok(this, cache_size_new)) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, QUICK_READ_MSG_INVALID_CONFIG,
+ "Not reconfiguring cache-size");
+ goto out;
+ }
+ conf->cache_size = cache_size_new;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int32_t
-qr_get_priority_list (const char *opt_str, struct list_head *first)
+qr_get_priority_list(const char *opt_str, struct list_head *first)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *priority_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct qr_priority *curr = NULL, *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO ("quick-read", opt_str, out);
- GF_VALIDATE_OR_GOTO ("quick-read", first, out);
-
- string = gf_strdup (opt_str);
- if (string == NULL) {
- max_pri = -1;
- goto out;
- }
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- priority_str = strtok_r (string, ",", &tmp_str);
- while (priority_str) {
- curr = GF_CALLOC (1, sizeof (*curr), gf_qr_mt_qr_priority_t);
- if (curr == NULL) {
- max_pri = -1;
- goto out;
- }
-
- list_add_tail (&curr->list, first);
-
- dup_str = gf_strdup (priority_str);
- if (dup_str == NULL) {
- max_pri = -1;
- goto out;
- }
-
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
- max_pri = -1;
- goto out;
- }
-
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
- max_pri = -1;
- goto out;
- }
-
- gf_log ("quick-read", GF_LOG_TRACE,
- "quick-read priority : pattern %s : priority %s",
- pattern,
- priority);
-
- curr->pattern = gf_strdup (pattern);
- if (curr->pattern == NULL) {
- max_pri = -1;
- goto out;
- }
-
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
- max_pri = -1;
- goto out;
- } else {
- max_pri = max (max_pri, curr->priority);
- }
-
- GF_FREE (dup_str);
- dup_str = NULL;
-
- priority_str = strtok_r (NULL, ",", &tmp_str);
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *priority_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("quick-read", opt_str, out);
+ GF_VALIDATE_OR_GOTO("quick-read", first, out);
+
+ string = gf_strdup(opt_str);
+ if (string == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ priority_str = strtok_r(string, ",", &tmp_str);
+ while (priority_str) {
+ curr = GF_CALLOC(1, sizeof(*curr), gf_qr_mt_qr_priority_t);
+ if (curr == NULL) {
+ max_pri = -1;
+ goto out;
}
-out:
- GF_FREE (string);
- GF_FREE (dup_str);
+ list_add_tail(&curr->list, first);
- if (max_pri == -1) {
- list_for_each_entry_safe (curr, tmp, first, list) {
- list_del_init (&curr->list);
- GF_FREE (curr->pattern);
- GF_FREE (curr);
- }
+ dup_str = gf_strdup(priority_str);
+ if (dup_str == NULL) {
+ max_pri = -1;
+ goto out;
}
- return max_pri;
-}
-
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ if (!pattern) {
+ max_pri = -1;
+ goto out;
+ }
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = -1, i = 0;
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
- return -1;
+ priority = strtok_r(NULL, ":", &tmp_str1);
+ if (!priority) {
+ max_pri = -1;
+ goto out;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ gf_msg_trace("quick-read", 0,
+ "quick-read priority : pattern %s : priority %s", pattern,
+ priority);
+
+ curr->pattern = gf_strdup(pattern);
+ if (curr->pattern == NULL) {
+ max_pri = -1;
+ goto out;
}
- priv = GF_CALLOC (1, sizeof (*priv), gf_qr_mt_qr_private_t);
- if (priv == NULL) {
- ret = -1;
- goto out;
+ curr->priority = strtol(priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
+ max_pri = -1;
+ goto out;
+ } else {
+ max_pri = max(max_pri, curr->priority);
}
- LOCK_INIT (&priv->table.lock);
- conf = &priv->conf;
+ GF_FREE(dup_str);
+ dup_str = NULL;
- GF_OPTION_INIT ("max-file-size", conf->max_file_size, size, out);
+ priority_str = strtok_r(NULL, ",", &tmp_str);
+ }
+out:
+ GF_FREE(string);
- GF_OPTION_INIT ("cache-timeout", conf->cache_timeout, int32, out);
+ GF_FREE(dup_str);
- GF_OPTION_INIT ("cache-size", conf->cache_size, size, out);
- if (!check_cache_size_ok (this, conf->cache_size)) {
- ret = -1;
- goto out;
+ if (max_pri == -1) {
+ list_for_each_entry_safe(curr, tmp, first, list)
+ {
+ list_del_init(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
}
+ }
- INIT_LIST_HEAD (&conf->priority_list);
- conf->max_pri = 1;
- if (dict_get (this->options, "priority")) {
- char *option_list = data_to_str (dict_get (this->options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- conf->max_pri = qr_get_priority_list (option_list,
- &conf->priority_list);
-
- if (conf->max_pri == -1) {
- goto out;
- }
- conf->max_pri ++;
- }
+ return max_pri;
+}
- priv->table.lru = GF_CALLOC (conf->max_pri, sizeof (*priv->table.lru),
- gf_common_mt_list_head);
- if (priv->table.lru == NULL) {
- ret = -1;
- goto out;
+int32_t
+qr_init(xlator_t *this)
+{
+ int32_t ret = -1, i = 0;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child",
+ this->name);
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_qr_mt_qr_private_t);
+ if (priv == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ LOCK_INIT(&priv->table.lock);
+ conf = &priv->conf;
+
+ GF_OPTION_INIT("max-file-size", conf->max_file_size, size_uint64, out);
+
+ GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out);
+
+ GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool,
+ out);
+
+ GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out);
+ if (!check_cache_size_ok(this, conf->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_INIT("ctime-invalidation", conf->ctime_invalidation, bool, out);
+
+ INIT_LIST_HEAD(&conf->priority_list);
+ conf->max_pri = 1;
+ if (dict_get(this->options, "priority")) {
+ char *option_list = data_to_str(dict_get(this->options, "priority"));
+ gf_msg_trace(this->name, 0, "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ conf->max_pri = qr_get_priority_list(option_list, &conf->priority_list);
+
+ if (conf->max_pri == -1) {
+ goto out;
}
+ conf->max_pri++;
+ }
- for (i = 0; i < conf->max_pri; i++) {
- INIT_LIST_HEAD (&priv->table.lru[i]);
- }
+ priv->table.lru = GF_CALLOC(conf->max_pri, sizeof(*priv->table.lru),
+ gf_common_mt_list_head);
+ if (priv->table.lru == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < conf->max_pri; i++) {
+ INIT_LIST_HEAD(&priv->table.lru[i]);
+ }
- ret = 0;
+ ret = 0;
- this->private = priv;
+ priv->last_child_down = gf_time();
+ GF_ATOMIC_INIT(priv->generation, 0);
+ this->private = priv;
out:
- if ((ret == -1) && priv) {
- GF_FREE (priv);
- }
+ if ((ret == -1) && priv) {
+ GF_FREE(priv);
+ }
- return ret;
+ return ret;
}
-
void
-qr_inode_table_destroy (qr_private_t *priv)
+qr_inode_table_destroy(qr_private_t *priv)
{
- int i = 0;
- qr_conf_t *conf = NULL;
-
- conf = &priv->conf;
-
- for (i = 0; i < conf->max_pri; i++) {
- GF_ASSERT (list_empty (&priv->table.lru[i]));
+ int i = 0;
+ qr_conf_t *conf = NULL;
+
+ conf = &priv->conf;
+
+ for (i = 0; i < conf->max_pri; i++) {
+ /* There is a known leak of inodes, hence until
+ * that is fixed, log the assert as warning.
+ GF_ASSERT (list_empty (&priv->table.lru[i]));*/
+ if (!list_empty(&priv->table.lru[i])) {
+ gf_msg("quick-read", GF_LOG_INFO, 0, QUICK_READ_MSG_LRU_NOT_EMPTY,
+ "quick read inode table lru not empty");
}
+ }
- LOCK_DESTROY (&priv->table.lock);
+ LOCK_DESTROY(&priv->table.lock);
- return;
+ return;
}
+void
+qr_conf_destroy(qr_conf_t *conf)
+{
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ list_for_each_entry_safe(curr, tmp, &conf->priority_list, list)
+ {
+ list_del(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
+ }
+
+ return;
+}
void
-qr_conf_destroy (qr_conf_t *conf)
+qr_update_child_down_time(xlator_t *this, time_t now)
{
- struct qr_priority *curr = NULL, *tmp = NULL;
+ qr_private_t *priv = NULL;
+
+ priv = this->private;
- list_for_each_entry_safe (curr, tmp, &conf->priority_list, list) {
- list_del (&curr->list);
- GF_FREE (curr->pattern);
- GF_FREE (curr);
+ LOCK(&priv->lock);
+ {
+ priv->last_child_down = now;
+ }
+ UNLOCK(&priv->lock);
+}
+
+static int
+qr_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ qr_private_t *priv = NULL;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ priv = this->private;
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ if (up_ci && (up_ci->flags & UP_WRITE_FLAGS)) {
+ GF_ATOMIC_INC(priv->qr_counter.file_data_invals);
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
}
+ qr_inode_prune(this, inode, qr_get_generation(this, inode));
+ }
- return;
+out:
+ if (inode)
+ inode_unref(inode);
+
+ return ret;
}
+int
+qr_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ priv = this->private;
+ conf = &priv->conf;
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ qr_update_child_down_time(this, gf_time());
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->qr_invalidation)
+ ret = qr_invalidate(this, data);
+ break;
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
void
-fini (xlator_t *this)
+qr_fini(xlator_t *this)
{
- qr_private_t *priv = NULL;
+ qr_private_t *priv = NULL;
- if (this == NULL) {
- goto out;
- }
+ if (this == NULL) {
+ goto out;
+ }
- priv = this->private;
- if (priv == NULL) {
- goto out;
- }
+ priv = this->private;
+ if (priv == NULL) {
+ goto out;
+ }
- qr_inode_table_destroy (priv);
- qr_conf_destroy (&priv->conf);
+ qr_inode_table_destroy(priv);
+ qr_conf_destroy(&priv->conf);
- this->private = NULL;
+ this->private = NULL;
- GF_FREE (priv);
+ GF_FREE(priv);
out:
- return;
+ return;
}
-struct xlator_fops fops = {
- .lookup = qr_lookup,
- .readdirp = qr_readdirp,
- .open = qr_open,
- .readv = qr_readv,
- .writev = qr_writev,
- .truncate = qr_truncate,
- .ftruncate = qr_ftruncate
-};
-
-struct xlator_cbks cbks = {
- .forget = qr_forget,
+struct xlator_fops qr_fops = {.lookup = qr_lookup,
+ .readdirp = qr_readdirp,
+ .open = qr_open,
+ .readv = qr_readv,
+ .writev = qr_writev,
+ .truncate = qr_truncate,
+ .ftruncate = qr_ftruncate,
+ .fallocate = qr_fallocate,
+ .discard = qr_discard,
+ .zerofill = qr_zerofill};
+
+struct xlator_cbks qr_cbks = {
+ .forget = qr_forget,
};
-struct xlator_dumpops dumpops = {
- .priv = qr_priv_dump,
- .inodectx = qr_inodectx_dump,
+struct xlator_dumpops qr_dumpops = {
+ .priv = qr_priv_dump,
+ .inodectx = qr_inodectx_dump,
};
-struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 32 * GF_UNIT_GB,
- .default_value = "128MB",
- .description = "Size of the read cache."
- },
- { .key = {"cache-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 60,
- .default_value = "1",
- },
- { .key = {"max-file-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 1 * GF_UNIT_KB * 1000,
- .default_value = "64KB",
- },
- { .key = {NULL} }
+struct volume_options qr_options[] = {
+ {
+ .key = {"quick-read"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable quick-read",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"priority"}, .type = GF_OPTION_TYPE_ANY},
+ {.key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = INFINITY,
+ .default_value = "128MB",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "Size of small file read cache."},
+ {
+ .key = {"cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ {
+ .key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 1 * GF_UNIT_KB * 1000,
+ .default_value = "64KB",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ {
+ .key = {"quick-read-cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "When \"on\", invalidates/updates the metadata cache,"
+ " on receiving the cache-invalidation notifications",
+ },
+ {
+ .key = {"ctime-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_5_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "Quick-read by default uses mtime to identify changes "
+ "to file data. However there are applications like "
+ "rsync which explicitly set mtime making it unreliable "
+ "for the purpose of identifying change in file content "
+ ". Since ctime also changes when content of a file "
+ " changes and it cannot be set explicitly, it becomes "
+ " suitable for identifying staleness of cached data. "
+ "This option makes quick-read to prefer ctime over "
+ "mtime to validate its cache. However, using ctime "
+ "can result in false positives as ctime changes with "
+ "just attribute changes like permission without "
+ "changes to file data. So, use this only when mtime "
+ "is not reliable",
+ },
+ {.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = qr_init,
+ .fini = qr_fini,
+ .notify = qr_notify,
+ .reconfigure = qr_reconfigure,
+ .mem_acct_init = qr_mem_acct_init,
+ .dump_metrics = qr_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &qr_dumpops,
+ .fops = &qr_fops,
+ .cbks = &qr_cbks,
+ .options = qr_options,
+ .identifier = "quick-read",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h
index 6f0a0541731..20fcc70b3a7 100644
--- a/xlators/performance/quick-read/src/quick-read.h
+++ b/xlators/performance/quick-read/src/quick-read.h
@@ -11,21 +11,16 @@
#ifndef __QUICK_READ_H
#define __QUICK_READ_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "list.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "common-utils.h"
-#include "call-stub.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
#include <libgen.h>
#include <sys/time.h>
#include <sys/types.h>
@@ -34,48 +29,63 @@
#include <fnmatch.h>
#include "quick-read-mem-types.h"
-
struct qr_inode {
- void *data;
- size_t size;
- int priority;
- uint32_t ia_mtime;
- uint32_t ia_mtime_nsec;
- struct iatt buf;
- struct timeval last_refresh;
- struct list_head lru;
+ void *data;
+ size_t size;
+ int priority;
+ uint32_t ia_mtime;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime;
+ uint32_t ia_ctime_nsec;
+ uint32_t gen_rollover;
+ struct iatt buf;
+ time_t last_refresh;
+ struct list_head lru;
+ uint64_t gen;
+ uint64_t invalidation_time;
};
typedef struct qr_inode qr_inode_t;
-
struct qr_priority {
- char *pattern;
- int32_t priority;
- struct list_head list;
+ char *pattern;
+ int32_t priority;
+ struct list_head list;
};
typedef struct qr_priority qr_priority_t;
struct qr_conf {
- uint64_t max_file_size;
- int32_t cache_timeout;
- uint64_t cache_size;
- int max_pri;
- struct list_head priority_list;
+ uint64_t max_file_size;
+ int32_t cache_timeout;
+ uint64_t cache_size;
+ int max_pri;
+ gf_boolean_t qr_invalidation;
+ gf_boolean_t ctime_invalidation;
+ struct list_head priority_list;
};
typedef struct qr_conf qr_conf_t;
struct qr_inode_table {
- uint64_t cache_used;
- struct list_head *lru;
- gf_lock_t lock;
+ uint64_t cache_used;
+ struct list_head *lru;
+ gf_lock_t lock;
};
typedef struct qr_inode_table qr_inode_table_t;
+struct qr_statistics {
+ gf_atomic_t cache_hit;
+ gf_atomic_t cache_miss;
+ gf_atomic_t file_data_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t files_cached;
+};
+
struct qr_private {
- qr_conf_t conf;
- qr_inode_table_t table;
+ qr_conf_t conf;
+ qr_inode_table_t table;
+ time_t last_child_down;
+ gf_lock_t lock;
+ struct qr_statistics qr_counter;
+ gf_atomic_int32_t generation;
};
typedef struct qr_private qr_private_t;
-
#endif /* #ifndef __QUICK_READ_H */
diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am
index be80ae7ac68..99efca3660c 100644
--- a/xlators/performance/read-ahead/src/Makefile.am
+++ b/xlators/performance/read-ahead/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = read-ahead.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-read_ahead_la_LDFLAGS = -module -avoid-version
+read_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
read_ahead_la_SOURCES = read-ahead.c page.c
read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = read-ahead.h read-ahead-mem-types.h
+noinst_HEADERS = read-ahead.h read-ahead-mem-types.h read-ahead-messages.h
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c
index e79e7ae7880..8a58ad8bb7a 100644
--- a/xlators/performance/read-ahead/src/page.c
+++ b/xlators/performance/read-ahead/src/page.c
@@ -8,446 +8,448 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "read-ahead.h"
#include <assert.h>
+#include "read-ahead-messages.h"
ra_page_t *
-ra_page_get (ra_file_t *file, off_t offset)
+ra_page_get(ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
- GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", file, out);
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ page = file->pages.next;
+ rounded_offset = gf_floor(offset, file->page_size);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- if (page == &file->pages || page->offset != rounded_offset)
- page = NULL;
+ if (page == &file->pages || page->offset != rounded_offset)
+ page = NULL;
out:
- return page;
+ return page;
}
-
ra_page_t *
-ra_page_create (ra_file_t *file, off_t offset)
+ra_page_create(ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
- ra_page_t *newpage = NULL;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ra_page_t *newpage = NULL;
- GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", file, out);
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ page = file->pages.next;
+ rounded_offset = gf_floor(offset, file->page_size);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- if (page == &file->pages || page->offset != rounded_offset) {
- newpage = GF_CALLOC (1, sizeof (*newpage), gf_ra_mt_ra_page_t);
- if (!newpage) {
- goto out;
- }
+ if (page == &file->pages || page->offset != rounded_offset) {
+ newpage = GF_CALLOC(1, sizeof(*newpage), gf_ra_mt_ra_page_t);
+ if (!newpage) {
+ goto out;
+ }
- newpage->offset = rounded_offset;
- newpage->prev = page->prev;
- newpage->next = page;
- newpage->file = file;
- page->prev->next = newpage;
- page->prev = newpage;
+ newpage->offset = rounded_offset;
+ newpage->prev = page->prev;
+ newpage->next = page;
+ newpage->file = file;
+ page->prev->next = newpage;
+ page->prev = newpage;
- page = newpage;
- }
+ page = newpage;
+ }
out:
- return page;
+ return page;
}
-
void
-ra_wait_on_page (ra_page_t *page, call_frame_t *frame)
+ra_wait_on_page(ra_page_t *page, call_frame_t *frame)
{
- ra_waitq_t *waitq = NULL;
- ra_local_t *local = NULL;
+ ra_waitq_t *waitq = NULL;
+ ra_local_t *local = NULL;
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, page, out);
- local = frame->local;
+ local = frame->local;
- waitq = GF_CALLOC (1, sizeof (*waitq), gf_ra_mt_ra_waitq_t);
- if (!waitq) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
+ waitq = GF_CALLOC(1, sizeof(*waitq), gf_ra_mt_ra_waitq_t);
+ if (!waitq) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
- waitq->data = frame;
- waitq->next = page->waitq;
- page->waitq = waitq;
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ page->waitq = waitq;
- ra_local_lock (local);
- {
- local->wait_count++;
- }
- ra_local_unlock (local);
+ ra_local_lock(local);
+ {
+ local->wait_count++;
+ }
+ ra_local_unlock(local);
out:
- return;
+ return;
}
-
void
-ra_waitq_return (ra_waitq_t *waitq)
+ra_waitq_return(ra_waitq_t *waitq)
{
- ra_waitq_t *trav = NULL;
- ra_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ra_waitq_t *trav = NULL;
+ ra_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ra_frame_return (frame);
- GF_FREE (trav);
- }
+ frame = trav->data;
+ ra_frame_return(frame);
+ GF_FREE(trav);
+ }
- return;
+ return;
}
-
int
-ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
+ra_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- ra_local_t *local = NULL;
- off_t pending_offset = 0;
- ra_file_t *file = NULL;
- ra_page_t *page = NULL;
- ra_waitq_t *waitq = NULL;
- fd_t *fd = NULL;
- uint64_t tmp_file = 0;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- fd = local->fd;
-
- fd_ctx_get (fd, this, &tmp_file);
-
- file = (ra_file_t *)(long)tmp_file;
- pending_offset = local->pending_offset;
-
- if (file == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "read-ahead context not set in fd (%p)", fd);
- op_ret = -1;
- op_errno = EBADF;
- goto out;
+ ra_local_t *local = NULL;
+ off_t pending_offset = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ fd_t *fd = NULL;
+ uint64_t tmp_file = 0;
+ gf_boolean_t stale = _gf_false;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ fd = local->fd;
+
+ fd_ctx_get(fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ pending_offset = local->pending_offset;
+
+ if (file == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, EBADF,
+ READ_AHEAD_MSG_FD_CONTEXT_NOT_SET,
+ "read-ahead context not set in fd (%p)", fd);
+ op_ret = -1;
+ op_errno = EBADF;
+ goto out;
+ }
+
+ ra_file_lock(file);
+ {
+ if (op_ret >= 0)
+ file->stbuf = *stbuf;
+
+ page = ra_page_get(file, pending_offset);
+
+ if (!page) {
+ gf_msg_trace(this->name, 0,
+ "wasted copy: "
+ "%" PRId64 "[+%" PRId64 "] file=%p",
+ pending_offset, file->page_size, file);
+ goto unlock;
}
- ra_file_lock (file);
- {
- if (op_ret >= 0)
- file->stbuf = *stbuf;
-
- page = ra_page_get (file, pending_offset);
-
- if (!page) {
- gf_log (this->name, GF_LOG_TRACE,
- "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
- pending_offset, file->page_size, file);
- goto unlock;
- }
-
- /*
- * "Dirty" means that the request was a pure read-ahead; it's
- * set for requests we issue ourselves, and cleared when user
- * requests are issued or put on the waitq. "Poisoned" means
- * that we got a write while a read was still in flight, and we
- * couldn't stop it so we marked it instead. If it's both
- * dirty and poisoned by the time we get here, we cancel its
- * effect so that a subsequent user read doesn't get data that
- * we know is stale (because we made it stale ourselves). We
- * can't use ESTALE because that has special significance.
- * ECANCELED has no such special meaning, and is close to what
- * we're trying to indicate.
- */
- if (page->dirty && page->poisoned) {
- op_ret = -1;
- op_errno = ECANCELED;
- }
-
- if (op_ret < 0) {
- waitq = ra_page_error (page, op_ret, op_errno);
- goto unlock;
- }
-
- if (page->vector) {
- iobref_unref (page->iobref);
- GF_FREE (page->vector);
- }
-
- page->vector = iov_dup (vector, count);
- if (page->vector == NULL) {
- waitq = ra_page_error (page, -1, ENOMEM);
- goto unlock;
- }
-
- page->count = count;
- page->iobref = iobref_ref (iobref);
- page->ready = 1;
-
- page->size = iov_length (vector, count);
-
- waitq = ra_page_wakeup (page);
+ if (page->stale) {
+ page->stale = 0;
+ page->ready = 0;
+ stale = 1;
+ goto unlock;
}
-unlock:
- ra_file_unlock (file);
-
- ra_waitq_return (waitq);
-
- fd_unref (local->fd);
- mem_put (frame->local);
- frame->local = NULL;
-
-out:
- STACK_DESTROY (frame->root);
- return 0;
-}
+ /*
+ * "Dirty" means that the request was a pure read-ahead; it's
+ * set for requests we issue ourselves, and cleared when user
+ * requests are issued or put on the waitq. "Poisoned" means
+ * that we got a write while a read was still in flight, and we
+ * couldn't stop it so we marked it instead. If it's both
+ * dirty and poisoned by the time we get here, we cancel its
+ * effect so that a subsequent user read doesn't get data that
+ * we know is stale (because we made it stale ourselves). We
+ * can't use ESTALE because that has special significance.
+ * ECANCELED has no such special meaning, and is close to what
+ * we're trying to indicate.
+ */
+ if (page->dirty && page->poisoned) {
+ op_ret = -1;
+ op_errno = ECANCELED;
+ }
+ if (op_ret < 0) {
+ waitq = ra_page_error(page, op_ret, op_errno);
+ goto unlock;
+ }
-void
-ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
-{
- call_frame_t *fault_frame = NULL;
- ra_local_t *fault_local = NULL;
- ra_page_t *page = NULL;
- ra_waitq_t *waitq = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
-
- fault_frame = copy_frame (frame);
- if (fault_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
+ if (page->vector) {
+ iobref_unref(page->iobref);
+ GF_FREE(page->vector);
}
- fault_local = mem_get0 (THIS->local_pool);
- if (fault_local == NULL) {
- STACK_DESTROY (fault_frame->root);
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
+ page->vector = iov_dup(vector, count);
+ if (page->vector == NULL) {
+ waitq = ra_page_error(page, -1, ENOMEM);
+ goto unlock;
}
- fault_frame->local = fault_local;
- fault_local->pending_offset = offset;
- fault_local->pending_size = file->page_size;
+ page->count = count;
+ page->iobref = iobref_ref(iobref);
+ page->ready = 1;
- fault_local->fd = fd_ref (file->fd);
+ page->size = iov_length(vector, count);
- STACK_WIND (fault_frame, ra_fault_cbk,
- FIRST_CHILD (fault_frame->this),
- FIRST_CHILD (fault_frame->this)->fops->readv,
- file->fd, file->page_size, offset, 0, NULL);
+ waitq = ra_page_wakeup(page);
+ }
+unlock:
+ ra_file_unlock(file);
- return;
+ if (stale) {
+ STACK_WIND(frame, ra_fault_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->readv, local->fd,
+ local->pending_size, local->pending_offset, 0, NULL);
-err:
- ra_file_lock (file);
- {
- page = ra_page_get (file, offset);
- if (page)
- waitq = ra_page_error (page, op_ret,
- op_errno);
- }
- ra_file_unlock (file);
+ return 0;
+ }
- if (waitq != NULL) {
- ra_waitq_return (waitq);
- }
+ ra_waitq_return(waitq);
+
+ fd_unref(local->fd);
+
+ mem_put(frame->local);
+ frame->local = NULL;
out:
- return;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-
void
-ra_frame_fill (ra_page_t *page, call_frame_t *frame)
+ra_page_fault(ra_file_t *file, call_frame_t *frame, off_t offset)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ra_fill_t *new = NULL;
-
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+ call_frame_t *fault_frame = NULL;
+ ra_local_t *fault_local = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, file, out);
+
+ fault_frame = copy_frame(frame);
+ if (fault_frame == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ fault_local = mem_get0(THIS->local_pool);
+ if (fault_local == NULL) {
+ STACK_DESTROY(fault_frame->root);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ fault_frame->local = fault_local;
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = file->page_size;
+
+ fault_local->fd = fd_ref(file->fd);
+
+ STACK_WIND(fault_frame, ra_fault_cbk, FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv, file->fd,
+ file->page_size, offset, 0, NULL);
+
+ return;
- local = frame->local;
- fill = &local->fill;
-
- if (local->op_ret != -1 && page->size) {
- if (local->offset > page->offset)
- src_offset = local->offset - page->offset;
- else
- dst_offset = page->offset - local->offset;
-
- copy_size = min (page->size - src_offset,
- local->size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- fill = fill->next;
- while (fill != &local->fill) {
- if (fill->offset > page->offset) {
- break;
- }
- fill = fill->next;
- }
-
- new = GF_CALLOC (1, sizeof (*new), gf_ra_mt_ra_fill_t);
- if (new == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
-
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- NULL);
- new->vector = GF_CALLOC (new->count, sizeof (struct iovec),
- gf_ra_mt_iovec);
- if (new->vector == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- GF_FREE (new);
- goto out;
- }
-
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- new->vector);
-
- new->next = fill;
- new->prev = new->next->prev;
- new->next->prev = new;
- new->prev->next = new;
-
- local->op_ret += copy_size;
- }
+err:
+ ra_file_lock(file);
+ {
+ page = ra_page_get(file, offset);
+ if (page)
+ waitq = ra_page_error(page, op_ret, op_errno);
+ }
+ ra_file_unlock(file);
+
+ if (waitq != NULL) {
+ ra_waitq_return(waitq);
+ }
out:
- return;
+ return;
}
-
void
-ra_frame_unwind (call_frame_t *frame)
+ra_frame_fill(ra_page_t *page, call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- int32_t count = 0;
- struct iovec *vector = NULL;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- ra_fill_t *next = NULL;
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- uint64_t tmp_file = 0;
-
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
-
- local = frame->local;
- fill = local->fill.next;
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ra_fill_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, page, out);
+
+ local = frame->local;
+ fill = &local->fill;
+
+ if (local->op_ret != -1 && page->size) {
+ if (local->offset > page->offset)
+ src_offset = local->offset - page->offset;
+ else
+ dst_offset = page->offset - local->offset;
+
+ copy_size = min(page->size - src_offset, local->size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
}
- frame->local = NULL;
-
+ fill = fill->next;
while (fill != &local->fill) {
- count += fill->count;
- fill = fill->next;
+ if (fill->offset > page->offset) {
+ break;
+ }
+ fill = fill->next;
}
- vector = GF_CALLOC (count, sizeof (*vector), gf_ra_mt_iovec);
- if (vector == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- iobref_unref (iobref);
- iobref = NULL;
+ new = GF_CALLOC(1, sizeof(*new), gf_ra_mt_ra_fill_t);
+ if (new == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref(page->iobref);
+ new->count = iov_subset(page->vector, page->count, src_offset,
+ copy_size, &new->vector, 0);
+ if (new->count < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(new->iobref);
+ GF_FREE(new);
+ goto out;
}
- fill = local->fill.next;
+ new->next = fill;
+ new->prev = new->next->prev;
+ new->next->prev = new;
+ new->prev->next = new;
- while (fill != &local->fill) {
- next = fill->next;
+ local->op_ret += copy_size;
+ }
- if ((vector != NULL) && (iobref != NULL)) {
- memcpy (((char *)vector) + copied, fill->vector,
- fill->count * sizeof (*vector));
+out:
+ return;
+}
- copied += (fill->count * sizeof (*vector));
- iobref_merge (iobref, fill->iobref);
- }
+void
+ra_frame_unwind(call_frame_t *frame)
+{
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ ra_fill_t *next = NULL;
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+
+ local = frame->local;
+ fill = local->fill.next;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+
+ frame->local = NULL;
+
+ while (fill != &local->fill) {
+ count += fill->count;
+ fill = fill->next;
+ }
+
+ vector = GF_CALLOC(count, sizeof(*vector), gf_ra_mt_iovec);
+ if (vector == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(iobref);
+ iobref = NULL;
+ }
+
+ fill = local->fill.next;
+
+ while (fill != &local->fill) {
+ next = fill->next;
+
+ if ((vector != NULL) && (iobref != NULL)) {
+ memcpy(((char *)vector) + copied, fill->vector,
+ fill->count * sizeof(*vector));
+
+ copied += (fill->count * sizeof(*vector));
+ if (iobref_merge(iobref, fill->iobref)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(iobref);
+ iobref = NULL;
+ }
+ }
- fill->next->prev = fill->prev;
- fill->prev->next = fill->prev;
+ fill->next->prev = fill->prev;
+ fill->prev->next = fill->prev;
- iobref_unref (fill->iobref);
- GF_FREE (fill->vector);
- GF_FREE (fill);
+ iobref_unref(fill->iobref);
+ GF_FREE(fill->vector);
+ GF_FREE(fill);
- fill = next;
- }
+ fill = next;
+ }
- fd = local->fd;
- fd_ctx_get (fd, frame->this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ fd = local->fd;
+ fd_ctx_get(fd, frame->this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
- vector, count, &file->stbuf, iobref, NULL);
+ STACK_UNWIND_STRICT(readv, frame, local->op_ret, local->op_errno, vector,
+ count, &file->stbuf, iobref, NULL);
- iobref_unref (iobref);
- pthread_mutex_destroy (&local->local_lock);
- mem_put (local);
- GF_FREE (vector);
+ iobref_unref(iobref);
+ pthread_mutex_destroy(&local->local_lock);
+ mem_put(local);
+ GF_FREE(vector);
out:
- return;
+ return;
}
/*
@@ -456,27 +458,27 @@ out:
*
*/
void
-ra_frame_return (call_frame_t *frame)
+ra_frame_return(call_frame_t *frame)
{
- ra_local_t *local = NULL;
- int32_t wait_count = 0;
+ ra_local_t *local = NULL;
+ int32_t wait_count = 0;
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
- local = frame->local;
- GF_ASSERT (local->wait_count > 0);
+ local = frame->local;
+ GF_ASSERT(local->wait_count > 0);
- ra_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ra_local_unlock (local);
+ ra_local_lock(local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ra_local_unlock(local);
- if (!wait_count)
- ra_frame_unwind (frame);
+ if (!wait_count)
+ ra_frame_unwind(frame);
out:
- return;
+ return;
}
/*
@@ -485,26 +487,26 @@ out:
*
*/
ra_waitq_t *
-ra_page_wakeup (ra_page_t *page)
+ra_page_wakeup(ra_page_t *page)
{
- ra_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
+ ra_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
- GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ra_frame_fill (page, frame);
- }
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ra_frame_fill(page, frame);
+ }
- if (page->stale) {
- ra_page_purge (page);
- }
+ if (page->stale) {
+ ra_page_purge(page);
+ }
out:
- return waitq;
+ return waitq;
}
/*
@@ -513,22 +515,22 @@ out:
*
*/
void
-ra_page_purge (ra_page_t *page)
+ra_page_purge(ra_page_t *page)
{
- GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", page, out);
- page->prev->next = page->next;
- page->next->prev = page->prev;
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
- if (page->iobref) {
- iobref_unref (page->iobref);
- }
+ if (page->iobref) {
+ iobref_unref(page->iobref);
+ }
- GF_FREE (page->vector);
- GF_FREE (page);
+ GF_FREE(page->vector);
+ GF_FREE(page);
out:
- return;
+ return;
}
/*
@@ -539,32 +541,32 @@ out:
*
*/
ra_waitq_t *
-ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
+ra_page_error(ra_page_t *page, int32_t op_ret, int32_t op_errno)
{
- ra_waitq_t *waitq = NULL;
- ra_waitq_t *trav = NULL;
- call_frame_t *frame = NULL;
- ra_local_t *local = NULL;
+ ra_waitq_t *waitq = NULL;
+ ra_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ ra_local_t *local = NULL;
- GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
- local = frame->local;
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
+ local = frame->local;
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
+ }
- ra_page_purge (page);
+ ra_page_purge(page);
out:
- return waitq;
+ return waitq;
}
/*
@@ -573,31 +575,31 @@ out:
*
*/
void
-ra_file_destroy (ra_file_t *file)
+ra_file_destroy(ra_file_t *file)
{
- ra_conf_t *conf = NULL;
- ra_page_t *trav = NULL;
+ ra_conf_t *conf = NULL;
+ ra_page_t *trav = NULL;
- GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", file, out);
- conf = file->conf;
+ conf = file->conf;
- ra_conf_lock (conf);
- {
- file->prev->next = file->next;
- file->next->prev = file->prev;
- }
- ra_conf_unlock (conf);
+ ra_conf_lock(conf);
+ {
+ file->prev->next = file->next;
+ file->next->prev = file->prev;
+ }
+ ra_conf_unlock(conf);
+ trav = file->pages.next;
+ while (trav != &file->pages) {
+ ra_page_error(trav, -1, EINVAL);
trav = file->pages.next;
- while (trav != &file->pages) {
- ra_page_error (trav, -1, EINVAL);
- trav = file->pages.next;
- }
+ }
- pthread_mutex_destroy (&file->file_lock);
- GF_FREE (file);
+ pthread_mutex_destroy(&file->file_lock);
+ GF_FREE(file);
out:
- return;
+ return;
}
diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
index 219e2928919..f07cfc5bba5 100644
--- a/xlators/performance/read-ahead/src/read-ahead-mem-types.h
+++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
@@ -8,19 +8,18 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __RA_MEM_TYPES_H__
#define __RA_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_ra_mem_types_ {
- gf_ra_mt_ra_file_t = gf_common_mt_end + 1,
- gf_ra_mt_ra_conf_t,
- gf_ra_mt_ra_page_t,
- gf_ra_mt_ra_waitq_t,
- gf_ra_mt_ra_fill_t,
- gf_ra_mt_iovec,
- gf_ra_mt_end
+ gf_ra_mt_ra_file_t = gf_common_mt_end + 1,
+ gf_ra_mt_ra_conf_t,
+ gf_ra_mt_ra_page_t,
+ gf_ra_mt_ra_waitq_t,
+ gf_ra_mt_ra_fill_t,
+ gf_ra_mt_iovec,
+ gf_ra_mt_end
};
#endif
diff --git a/xlators/performance/read-ahead/src/read-ahead-messages.h b/xlators/performance/read-ahead/src/read-ahead-messages.h
new file mode 100644
index 00000000000..0302b7a7122
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead-messages.h
@@ -0,0 +1,31 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _READ_AHEAD_MESSAGES_H_
+#define _READ_AHEAD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(READ_AHEAD, READ_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ READ_AHEAD_MSG_VOL_MISCONFIGURED, READ_AHEAD_MSG_NO_MEMORY,
+ READ_AHEAD_MSG_FD_CONTEXT_NOT_SET,
+ READ_AHEAD_MSG_UNDESTROYED_FILE_FOUND,
+ READ_AHEAD_MSG_XLATOR_CONF_NULL);
+
+#endif /* _READ_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
index 241fa477fda..5246e1317d2 100644
--- a/xlators/performance/read-ahead/src/read-ahead.c
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -15,197 +15,187 @@
- ensure efficient memory management in case of random seek
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "read-ahead.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include <assert.h>
#include <sys/time.h>
+#include "read-ahead-messages.h"
static void
-read_ahead (call_frame_t *frame, ra_file_t *file);
-
+read_ahead(call_frame_t *frame, ra_file_t *file);
int
-ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+ra_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
-
- conf = this->private;
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
- if (!file) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- /* If O_DIRECT open, we disable caching on it */
-
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
-
- file->offset = (unsigned long long) 0;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
-
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
-
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
-
- if (!file->disabled) {
- file->page_count = 1;
- }
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
- if (ret == -1) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "cannot set read-ahead context information in fd (%p)",
- fd);
- ra_file_destroy (file);
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = GF_CALLOC(1, sizeof(*file), gf_ra_mt_ra_file_t);
+ if (!file) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long)0;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long)0;
+ file->pages.file = file;
+
+ ra_conf_lock(conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock(conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init(&file->file_lock, NULL);
+
+ if (!file->disabled) {
+ file->page_count = 1;
+ }
+
+ ret = fd_ctx_set(fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_msg(frame->this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_NO_MEMORY,
+ "cannot set read-ahead context"
+ "information in fd (%p)",
+ fd);
+ ra_file_destroy(file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
-
int
-ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ra_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
-
- conf = this->private;
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
- if (!file) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- /* If O_DIRECT open, we disable caching on it */
-
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
-
- file->offset = (unsigned long long) 0;
- //file->size = fd->inode->buf.ia_size;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
-
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
-
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot set read ahead context information in fd (%p)",
- fd);
- ra_file_destroy (file);
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = GF_CALLOC(1, sizeof(*file), gf_ra_mt_ra_file_t);
+ if (!file) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long)0;
+ // file->size = fd->inode->buf.ia_size;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long)0;
+ file->pages.file = file;
+
+ ra_conf_lock(conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock(conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init(&file->file_lock, NULL);
+
+ ret = fd_ctx_set(fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_NO_MEMORY,
+ "cannot set read ahead context"
+ "information in fd (%p)",
+ fd);
+ ra_file_destroy(file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
-
int
-ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+ra_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
- STACK_WIND (frame, ra_open_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open,
- loc, flags, fd, xdata);
+ STACK_WIND(frame, ra_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
+ return 0;
}
-
int
-ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+ra_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
- STACK_WIND (frame, ra_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
+ STACK_WIND(frame, ra_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
- return 0;
+ return 0;
}
/* free cache pages between offset and offset+size,
@@ -213,733 +203,710 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
static void
-flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size,
- int for_write)
+flush_region(call_frame_t *frame, ra_file_t *file, off_t offset, off_t size,
+ int for_write)
{
- ra_page_t *trav = NULL;
- ra_page_t *next = NULL;
-
- ra_file_lock (file);
- {
- trav = file->pages.next;
- while (trav != &file->pages
- && trav->offset < (offset + size)) {
-
- next = trav->next;
- if (trav->offset >= offset) {
- if (!trav->waitq) {
- ra_page_purge (trav);
- }
- else {
- trav->stale = 1;
-
- if (for_write) {
- trav->poisoned = 1;
- }
- }
- }
- trav = next;
+ ra_page_t *trav = NULL;
+ ra_page_t *next = NULL;
+
+ ra_file_lock(file);
+ {
+ trav = file->pages.next;
+ while (trav != &file->pages && trav->offset < (offset + size)) {
+ next = trav->next;
+ if (trav->offset >= offset) {
+ if (!trav->waitq) {
+ ra_page_purge(trav);
+ } else {
+ trav->stale = 1;
+
+ if (for_write) {
+ trav->poisoned = 1;
+ }
}
+ }
+ trav = next;
}
- ra_file_unlock (file);
+ }
+ ra_file_unlock(file);
}
-
int
-ra_release (xlator_t *this, fd_t *fd)
+ra_release(xlator_t *this, fd_t *fd)
{
- uint64_t tmp_file = 0;
- int ret = 0;
+ uint64_t tmp_file = 0;
+ int ret = 0;
- GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
- ret = fd_ctx_del (fd, this, &tmp_file);
+ ret = fd_ctx_del(fd, this, &tmp_file);
- if (!ret) {
- ra_file_destroy ((ra_file_t *)(long)tmp_file);
- }
+ if (!ret) {
+ ra_file_destroy((ra_file_t *)(long)tmp_file);
+ }
out:
- return 0;
+ return 0;
}
-
void
-read_ahead (call_frame_t *frame, ra_file_t *file)
+read_ahead(call_frame_t *frame, ra_file_t *file)
{
- off_t ra_offset = 0;
- size_t ra_size = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- off_t cap = 0;
- char fault = 0;
-
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
-
- if (!file->page_count) {
- goto out;
+ off_t ra_offset = 0;
+ size_t ra_size = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ off_t cap = 0;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, file, out);
+
+ if (!file->page_count) {
+ goto out;
+ }
+
+ ra_size = file->page_size * file->page_count;
+ ra_offset = gf_floor(file->offset, file->page_size);
+ cap = file->size ? file->size : file->offset + ra_size;
+
+ while (ra_offset < min(file->offset + ra_size, cap)) {
+ ra_file_lock(file);
+ {
+ trav = ra_page_get(file, ra_offset);
}
+ ra_file_unlock(file);
- ra_size = file->page_size * file->page_count;
- ra_offset = floor (file->offset, file->page_size);
- cap = file->size ? file->size : file->offset + ra_size;
+ if (!trav)
+ break;
- while (ra_offset < min (file->offset + ra_size, cap)) {
+ ra_offset += file->page_size;
+ }
- ra_file_lock (file);
- {
- trav = ra_page_get (file, ra_offset);
- }
- ra_file_unlock (file);
+ if (trav) {
+ /* comfortable enough */
+ goto out;
+ }
- if (!trav)
- break;
+ trav_offset = ra_offset;
- ra_offset += file->page_size;
- }
+ cap = file->size ? file->size : ra_offset + ra_size;
- if (trav) {
- /* comfortable enough */
- goto out;
+ while (trav_offset < min(ra_offset + ra_size, cap)) {
+ fault = 0;
+ ra_file_lock(file);
+ {
+ trav = ra_page_get(file, trav_offset);
+ if (!trav) {
+ fault = 1;
+ trav = ra_page_create(file, trav_offset);
+ if (trav)
+ trav->dirty = 1;
+ }
}
+ ra_file_unlock(file);
- trav_offset = ra_offset;
-
- cap = file->size ? file->size : ra_offset + ra_size;
-
- while (trav_offset < min(ra_offset + ra_size, cap)) {
- fault = 0;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- fault = 1;
- trav = ra_page_create (file, trav_offset);
- if (trav)
- trav->dirty = 1;
- }
- }
- ra_file_unlock (file);
-
- if (!trav) {
- /* OUT OF MEMORY */
- break;
- }
+ if (!trav) {
+ /* OUT OF MEMORY */
+ break;
+ }
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "RA at offset=%"PRId64, trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
- trav_offset += file->page_size;
+ if (fault) {
+ gf_msg_trace(frame->this->name, 0, "RA at offset=%" PRId64,
+ trav_offset);
+ ra_page_fault(file, frame, trav_offset);
}
+ trav_offset += file->page_size;
+ }
out:
- return;
+ return;
}
-
int
-ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
+ra_need_atime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- GF_ASSERT (frame);
- STACK_DESTROY (frame->root);
- return 0;
+ GF_ASSERT(frame);
+ STACK_DESTROY(frame->root);
+ return 0;
}
-
static void
-dispatch_requests (call_frame_t *frame, ra_file_t *file)
+dispatch_requests(call_frame_t *frame, ra_file_t *file)
{
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- call_frame_t *ra_frame = NULL;
- char need_atime_update = 1;
- char fault = 0;
-
- GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
-
- local = frame->local;
- conf = file->conf;
-
- rounded_offset = floor (local->offset, file->page_size);
- rounded_end = roof (local->offset + local->size, file->page_size);
-
- trav_offset = rounded_offset;
-
- while (trav_offset < rounded_end) {
- fault = 0;
-
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- trav = ra_page_create (file, trav_offset);
- if (!trav) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
- }
- fault = 1;
- need_atime_update = 0;
- }
- trav->dirty = 0;
-
- if (trav->ready) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "HIT at offset=%"PRId64".",
- trav_offset);
- ra_frame_fill (trav, frame);
- } else {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "IN-TRANSIT at offset=%"PRId64".",
- trav_offset);
- ra_wait_on_page (trav, frame);
- need_atime_update = 0;
- }
- }
- unlock:
- ra_file_unlock (file);
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ call_frame_t *ra_frame = NULL;
+ char need_atime_update = 1;
+ char fault = 0;
- if (local->op_ret == -1) {
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, file, out);
+
+ local = frame->local;
+ conf = file->conf;
+
+ rounded_offset = gf_floor(local->offset, file->page_size);
+ rounded_end = gf_roof(local->offset + local->size, file->page_size);
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "MISS at offset=%"PRId64".",
- trav_offset);
- ra_page_fault (file, frame, trav_offset);
+ trav_offset = rounded_offset;
+
+ while (trav_offset < rounded_end) {
+ fault = 0;
+
+ ra_file_lock(file);
+ {
+ trav = ra_page_get(file, trav_offset);
+ if (!trav) {
+ trav = ra_page_create(file, trav_offset);
+ if (!trav) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
}
+ fault = 1;
+ need_atime_update = 0;
+ }
+ trav->dirty = 0;
+
+ if (trav->ready) {
+ gf_msg_trace(frame->this->name, 0, "HIT at offset=%" PRId64 ".",
+ trav_offset);
+ ra_frame_fill(trav, frame);
+ } else {
+ gf_msg_trace(frame->this->name, 0,
+ "IN-TRANSIT at "
+ "offset=%" PRId64 ".",
+ trav_offset);
+ ra_wait_on_page(trav, frame);
+ need_atime_update = 0;
+ }
+ }
+ unlock:
+ ra_file_unlock(file);
- trav_offset += file->page_size;
+ if (local->op_ret == -1) {
+ goto out;
}
- if (need_atime_update && conf->force_atime_update) {
- /* TODO: use untimens() since readv() can confuse underlying
- io-cache and others */
- ra_frame = copy_frame (frame);
- if (ra_frame == NULL) {
- goto out;
- }
+ if (fault) {
+ gf_msg_trace(frame->this->name, 0, "MISS at offset=%" PRId64 ".",
+ trav_offset);
+ ra_page_fault(file, frame, trav_offset);
+ }
+
+ trav_offset += file->page_size;
+ }
- STACK_WIND (ra_frame, ra_need_atime_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, 1, 1, 0, NULL);
+ if (need_atime_update && conf->force_atime_update) {
+ /* TODO: use untimens() since readv() can confuse underlying
+ io-cache and others */
+ ra_frame = copy_frame(frame);
+ if (ra_frame == NULL) {
+ goto out;
}
+ STACK_WIND(ra_frame, ra_need_atime_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->readv, file->fd, 1, 1, 0,
+ NULL);
+ }
+
out:
- return ;
+ return;
}
-
int
-ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
+ra_readv_disabled_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- GF_ASSERT (frame);
+ GF_ASSERT(frame);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
- return 0;
+ return 0;
}
-
int
-ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+ra_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- ra_file_t *file = NULL;
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- int op_errno = EINVAL;
- char expected_offset = 1;
- uint64_t tmp_file = 0;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
-
- conf = this->private;
-
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
- offset, size);
-
- fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file || file->disabled) {
- goto disabled;
+ ra_file_t *file = NULL;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ int op_errno = EINVAL;
+ char expected_offset = 1;
+ uint64_t tmp_file = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ conf = this->private;
+
+ gf_msg_trace(this->name, 0,
+ "NEW REQ at offset=%" PRId64 " for size=%" GF_PRI_SIZET "",
+ offset, size);
+
+ fd_ctx_get(fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file || file->disabled) {
+ goto disabled;
+ }
+
+ if (file->offset != offset) {
+ gf_msg_trace(this->name, 0,
+ "unexpected offset (%" PRId64 " != %" PRId64
+ ") "
+ "resetting",
+ file->offset, offset);
+
+ expected_offset = file->expected = file->page_count = 0;
+ } else {
+ gf_msg_trace(this->name, 0,
+ "expected offset (%" PRId64 ") when page_count=%d", offset,
+ file->page_count);
+
+ if (file->expected < (file->page_size * conf->page_count)) {
+ file->expected += size;
+ file->page_count = min((file->expected / file->page_size),
+ conf->page_count);
}
+ }
- if (file->offset != offset) {
- gf_log (this->name, GF_LOG_TRACE,
- "unexpected offset (%"PRId64" != %"PRId64") resetting",
- file->offset, offset);
-
- expected_offset = file->expected = file->page_count = 0;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "expected offset (%"PRId64") when page_count=%d",
- offset, file->page_count);
-
- if (file->expected < (file->page_size * conf->page_count)) {
- file->expected += size;
- file->page_count = min ((file->expected
- / file->page_size),
- conf->page_count);
- }
- }
+ if (!expected_offset) {
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
- if (!expected_offset) {
- flush_region (frame, file, 0, file->pages.prev->offset + 1, 0);
- }
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ local->fd = fd;
+ local->offset = offset;
+ local->size = size;
+ local->wait_count = 1;
- local->fd = fd;
- local->offset = offset;
- local->size = size;
- local->wait_count = 1;
+ local->fill.next = &local->fill;
+ local->fill.prev = &local->fill;
- local->fill.next = &local->fill;
- local->fill.prev = &local->fill;
+ pthread_mutex_init(&local->local_lock, NULL);
- pthread_mutex_init (&local->local_lock, NULL);
+ frame->local = local;
- frame->local = local;
+ dispatch_requests(frame, file);
- dispatch_requests (frame, file);
+ flush_region(frame, file, 0, gf_floor(offset, file->page_size), 0);
- flush_region (frame, file, 0, floor (offset, file->page_size), 0);
+ read_ahead(frame, file);
- read_ahead (frame, file);
+ file->offset = offset + size;
- ra_frame_return (frame);
+ ra_frame_return(frame);
- file->offset = offset + size;
-
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
- NULL);
+ STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
disabled:
- STACK_WIND (frame, ra_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+ STACK_WIND(frame, ra_readv_disabled_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
}
-
int
-ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+ra_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- GF_ASSERT (frame);
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
+ GF_ASSERT(frame);
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
-
int
-ra_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+ra_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- GF_ASSERT (frame);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ GF_ASSERT(frame);
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-
int
-ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ra_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- ra_file_t *file = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+ int32_t op_errno = EINVAL;
- fd_ctx_get (fd, this, &tmp_file);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- file = (ra_file_t *)(long)tmp_file;
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
- }
-
- STACK_WIND (frame, ra_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd, xdata);
- return 0;
+ STACK_WIND(frame, ra_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-
int
-ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
- dict_t *xdata)
+ra_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+ int32_t op_errno = EINVAL;
- fd_ctx_get (fd, this, &tmp_file);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- file = (ra_file_t *)(long)tmp_file;
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
- }
-
- STACK_WIND (frame, ra_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync, fd, datasync, xdata);
- return 0;
+ STACK_WIND(frame, ra_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int
-ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+ra_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
+ ra_file_t *file = NULL;
- GF_ASSERT (frame);
+ GF_ASSERT(frame);
- file = frame->local;
+ file = frame->local;
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
- }
+ if (file) {
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int
-ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
+ra_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
-
- fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+
+ if (iter_fd == fd)
frame->local = file;
- /* reset the read-ahead counters too */
- file->expected = file->page_count = 0;
+
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
+
+ /* reset the read-ahead counters too */
+ file->expected = file->page_count = 0;
}
+ }
+ UNLOCK(&inode->lock);
- STACK_WIND (frame, ra_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
+ STACK_WIND(frame, ra_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int
-ra_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+ra_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- GF_ASSERT (frame);
+ GF_ASSERT(frame);
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int
-ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+ra_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- GF_ASSERT (frame);
+ GF_ASSERT(frame);
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-
int
-ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+ra_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc, unwind);
- inode = loc->inode;
+ inode = loc->inode;
- LOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
{
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- /*
- * Truncation invalidates reads just like writing does.
- * TBD: this seems to flush more than it should. The
- * only time we should flush at all is when we're
- * shortening (not lengthening) the file, and then only
- * from new EOF to old EOF. The same problem exists in
- * ra_ftruncate.
- */
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1, 1);
- }
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_ftruncate.
+ */
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
}
- UNLOCK (&inode->lock);
+ }
+ UNLOCK(&inode->lock);
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- loc, offset, xdata);
- return 0;
+ STACK_WIND(frame, ra_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
void
-ra_page_dump (struct ra_page *page)
+ra_page_dump(struct ra_page *page)
{
- int i = 0;
- call_frame_t *frame = NULL;
- char key[GF_DUMP_MAX_BUF_LEN] = {0, };
- ra_waitq_t *trav = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ ra_waitq_t *trav = NULL;
- if (page == NULL) {
- goto out;
- }
+ if (page == NULL) {
+ goto out;
+ }
- gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+ gf_proc_dump_write("offset", "%" PRId64, page->offset);
- gf_proc_dump_write ("size", "%"PRId64, page->size);
+ gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);
- gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");
- gf_proc_dump_write ("poisoned", "%s", page->poisoned ? "yes" : "no");
+ gf_proc_dump_write("poisoned", "%s", page->poisoned ? "yes" : "no");
- gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+ gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");
- for (trav = page->waitq; trav; trav = trav->next) {
- frame = trav->data;
- sprintf (key, "waiting-frame[%d]", i++);
- gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
- }
+ for (trav = page->waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ sprintf(key, "waiting-frame[%d]", i++);
+ gf_proc_dump_write(key, "%" PRId64, frame->root->unique);
+ }
out:
- return;
+ return;
}
int32_t
-ra_fdctx_dump (xlator_t *this, fd_t *fd)
+ra_fdctx_dump(xlator_t *this, fd_t *fd)
{
- ra_file_t *file = NULL;
- ra_page_t *page = NULL;
- int32_t ret = 0, i = 0;
- uint64_t tmp_file = 0;
- char *path = NULL;
- char key[GF_DUMP_MAX_BUF_LEN] = {0, };
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
-
- fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (file == NULL) {
- ret = 0;
- goto out;
- }
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ int32_t ret = 0, i = 0;
+ uint64_t tmp_file = 0;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ fd_ctx_get(fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file == NULL) {
+ ret = 0;
+ goto out;
+ }
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.read-ahead",
- "file");
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.read-ahead", "file");
- gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
- ret = __inode_path (fd->inode, NULL, &path);
- if (path != NULL) {
- gf_proc_dump_write ("path", "%s", path);
- GF_FREE (path);
- }
+ ret = __inode_path(fd->inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
- gf_proc_dump_write ("fd", "%p", fd);
+ gf_proc_dump_write("fd", "%p", fd);
- gf_proc_dump_write ("disabled", "%s", file->disabled ? "yes" : "no");
+ gf_proc_dump_write("disabled", "%s", file->disabled ? "yes" : "no");
- if (file->disabled) {
- ret = 0;
- goto out;
- }
+ if (file->disabled) {
+ ret = 0;
+ goto out;
+ }
- gf_proc_dump_write ("page-size", "%"PRId64, file->page_size);
+ gf_proc_dump_write("page-size", "%" PRId64, file->page_size);
- gf_proc_dump_write ("page-count", "%u", file->page_count);
+ gf_proc_dump_write("page-count", "%u", file->page_count);
- gf_proc_dump_write ("next-expected-offset-for-sequential-reads",
- "%"PRId64, file->offset);
+ gf_proc_dump_write("next-expected-offset-for-sequential-reads", "%" PRId64,
+ file->offset);
- for (page = file->pages.next; page != &file->pages;
- page = page->next) {
- sprintf (key, "page[%d]", i);
- gf_proc_dump_write (key, "%p", page[i++]);
- ra_page_dump (page);
- }
+ for (page = file->pages.next; page != &file->pages; page = page->next) {
+ gf_proc_dump_write("page", "%d: %p", i++, (void *)page);
+ ra_page_dump(page);
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ra_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+ ra_conf_t *conf = NULL;
+
+ conf = this->private;
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- inode = fd->inode;
+ inode = fd->inode;
- LOCK (&inode->lock);
+ if (conf->force_atime_update) {
+ LOCK(&inode->lock);
{
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1, 0);
- }
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
}
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
+ }
- STACK_WIND (frame, ra_attr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd, xdata);
- return 0;
+ STACK_WIND(frame, ra_attr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
-
int
-ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+ra_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- inode = fd->inode;
+ inode = fd->inode;
- LOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
{
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (!file)
- continue;
- /*
- * Truncation invalidates reads just like writing does.
- * TBD: this seems to flush more than it should. The
- * only time we should flush at all is when we're
- * shortening (not lengthening) the file, and then only
- * from new EOF to old EOF. The same problem exists in
- * ra_truncate.
- */
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1, 1);
- }
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_truncate.
+ */
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
}
- UNLOCK (&inode->lock);
+ }
+ UNLOCK(&inode->lock);
- STACK_WIND (frame, ra_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate, fd, offset, xdata);
- return 0;
+ STACK_WIND(frame, ra_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
@@ -947,261 +914,359 @@ ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- GF_ASSERT (frame);
+ GF_ASSERT(frame);
- STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
static int
ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
+ size_t len, dict_t *xdata)
{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- uint64_t tmp_file = 0;
- int32_t op_errno = EINVAL;
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- inode = fd->inode;
+ inode = fd->inode;
- LOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
{
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (!file)
- continue;
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
- flush_region(frame, file, offset, len, 1);
- }
+ flush_region(frame, file, offset, len, 1);
}
- UNLOCK (&inode->lock);
+ }
+ UNLOCK(&inode->lock);
- STACK_WIND (frame, ra_discard_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata);
- return 0;
+ STACK_WIND(frame, ra_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
-ra_priv_dump (xlator_t *this)
+ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- int ret = -1;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- gf_boolean_t add_section = _gf_false;
+ GF_ASSERT(frame);
- if (!this) {
- goto out;
- }
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING, "conf null in xlator");
- goto out;
- }
+static int
+ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- gf_proc_dump_build_key (key_prefix, "xlator.performance.read-ahead",
- "priv");
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- gf_proc_dump_add_section (key_prefix);
- add_section = _gf_true;
+ inode = fd->inode;
- ret = pthread_mutex_trylock (&conf->conf_lock);
- if (ret)
- goto out;
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
{
- gf_proc_dump_write ("page_size", "%d", conf->page_size);
- gf_proc_dump_write ("page_count", "%d", conf->page_count);
- gf_proc_dump_write ("force_atime_update", "%d",
- conf->force_atime_update);
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
}
- pthread_mutex_unlock (&conf->conf_lock);
+ }
+ UNLOCK(&inode->lock);
- ret = 0;
-out:
- if (ret && conf) {
- if (add_section == _gf_false)
- gf_proc_dump_add_section (key_prefix);
+ STACK_WIND(frame, ra_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
- gf_proc_dump_write ("Unable to dump priv",
- "(Lock acquisition failed) %s", this->name);
- }
- return ret;
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+int
+ra_priv_dump(xlator_t *this)
+{
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ if (!this) {
+ goto out;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_XLATOR_CONF_NULL,
+ "conf null in xlator");
+ goto out;
+ }
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.read-ahead", "priv");
+
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ ret = pthread_mutex_trylock(&conf->conf_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write("page_size", "%" PRIu64, conf->page_size);
+ gf_proc_dump_write("page_count", "%d", conf->page_count);
+ gf_proc_dump_write("force_atime_update", "%d",
+ conf->force_atime_update);
+ }
+ pthread_mutex_unlock(&conf->conf_lock);
+
+ ret = 0;
+out:
+ if (ret && conf) {
+ gf_proc_dump_write("Unable to dump priv",
+ "(Lock acquisition failed) %s", this->name);
+ }
+ return ret;
+}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this) {
- goto out;
- }
+ if (!this) {
+ goto out;
+ }
- ret = xlator_mem_acct_init (this, gf_ra_mt_end + 1);
+ ret = xlator_mem_acct_init(this, gf_ra_mt_end + 1);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- }
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READ_AHEAD_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
+ }
out:
- return ret;
+ return ret;
}
int
-reconfigure (xlator_t *this, dict_t *options)
+reconfigure(xlator_t *this, dict_t *options)
{
- ra_conf_t *conf = NULL;
- int ret = -1;
+ ra_conf_t *conf = NULL;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
- GF_VALIDATE_OR_GOTO ("read-ahead", this->private, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", this->private, out);
- conf = this->private;
+ conf = this->private;
- GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out);
+ GF_OPTION_RECONF("page-count", conf->page_count, options, uint32, out);
- GF_OPTION_RECONF ("page-size", conf->page_size, options, size, out);
+ GF_OPTION_RECONF("page-size", conf->page_size, options, size_uint64, out);
- ret = 0;
- out:
- return ret;
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
}
int
-init (xlator_t *this)
+init(xlator_t *this)
{
- ra_conf_t *conf = NULL;
- int32_t ret = -1;
+ ra_conf_t *conf = NULL;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: read-ahead not configured with exactly one"
- " child");
- goto out;
- }
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READ_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: read-ahead not configured with exactly one"
+ " child");
+ goto out;
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
- conf = (void *) GF_CALLOC (1, sizeof (*conf), gf_ra_mt_ra_conf_t);
- if (conf == NULL) {
- goto out;
- }
+ conf = (void *)GF_CALLOC(1, sizeof(*conf), gf_ra_mt_ra_conf_t);
+ if (conf == NULL) {
+ goto out;
+ }
- conf->page_size = this->ctx->page_size;
+ conf->page_size = this->ctx->page_size;
- GF_OPTION_INIT ("page-size", conf->page_size, size, out);
+ GF_OPTION_INIT("page-size", conf->page_size, size_uint64, out);
- GF_OPTION_INIT ("page-count", conf->page_count, uint32, out);
+ GF_OPTION_INIT("page-count", conf->page_count, uint32, out);
- GF_OPTION_INIT ("force-atime-update", conf->force_atime_update, bool, out);
+ GF_OPTION_INIT("force-atime-update", conf->force_atime_update, bool, out);
- conf->files.next = &conf->files;
- conf->files.prev = &conf->files;
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
- pthread_mutex_init (&conf->conf_lock, NULL);
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
- this->local_pool = mem_pool_new (ra_local_t, 64);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
+ pthread_mutex_init(&conf->conf_lock, NULL);
- this->private = conf;
- ret = 0;
+ this->local_pool = mem_pool_new(ra_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READ_AHEAD_MSG_NO_MEMORY,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = conf;
+ ret = 0;
out:
- if (ret == -1) {
- GF_FREE (conf);
- }
+ if (ret == -1) {
+ GF_FREE(conf);
+ }
- return ret;
+ return ret;
}
-
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- ra_conf_t *conf = NULL;
+ ra_conf_t *conf = NULL;
- GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
- conf = this->private;
- if (conf == NULL) {
- goto out;
- }
+ conf = this->private;
+ if (conf == NULL) {
+ goto out;
+ }
- this->private = NULL;
+ this->private = NULL;
- GF_ASSERT ((conf->files.next == &conf->files)
- && (conf->files.prev == &conf->files));
+ /* The files structures allocated in open and create are not deleted.
+ * until that is freed, marking the below assert as warning.
+ GF_ASSERT ((conf->files.next == &conf->files)
+ && (conf->files.prev == &conf->files));
+ */
+ if (!((conf->files.next == &conf->files) &&
+ (conf->files.prev == &conf->files))) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ READ_AHEAD_MSG_UNDESTROYED_FILE_FOUND,
+ "undestroyed read ahead file structures found");
+ }
- pthread_mutex_destroy (&conf->conf_lock);
- GF_FREE (conf);
+ pthread_mutex_destroy(&conf->conf_lock);
+ GF_FREE(conf);
out:
- return;
+ return;
}
struct xlator_fops fops = {
- .open = ra_open,
- .create = ra_create,
- .readv = ra_readv,
- .writev = ra_writev,
- .flush = ra_flush,
- .fsync = ra_fsync,
- .truncate = ra_truncate,
- .ftruncate = ra_ftruncate,
- .fstat = ra_fstat,
- .discard = ra_discard,
+ .open = ra_open,
+ .create = ra_create,
+ .readv = ra_readv,
+ .writev = ra_writev,
+ .flush = ra_flush,
+ .fsync = ra_fsync,
+ .truncate = ra_truncate,
+ .ftruncate = ra_ftruncate,
+ .fstat = ra_fstat,
+ .discard = ra_discard,
+ .zerofill = ra_zerofill,
};
struct xlator_cbks cbks = {
- .release = ra_release,
+ .release = ra_release,
};
struct xlator_dumpops dumpops = {
- .priv = ra_priv_dump,
- .fdctx = ra_fdctx_dump,
+ .priv = ra_priv_dump,
+ .fdctx = ra_fdctx_dump,
};
struct volume_options options[] = {
- { .key = {"force-atime-update"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "false"
- },
- { .key = {"page-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 16,
- .default_value = "4",
- .description = "Number of pages that will be pre-fetched"
- },
- { .key = {"page-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 4096,
- .max = 1048576 * 64,
- .default_value = "131072",
- .description = "Page size with which read-ahead performs server I/O"
- },
- { .key = {NULL} },
+ {
+ .key = {"read-ahead"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable read-ahead",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"force-atime-update"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {1},
+ .tags = {"read-ahead"},
+ .default_value = "false"},
+ {.key = {"page-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 16,
+ .default_value = "4",
+ .op_version = {1},
+ .tags = {"read-ahead"},
+ .description = "Number of pages that will be pre-fetched"},
+ {.key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 1048576 * 64,
+ .default_value = "131072",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"read-ahead"},
+ .description = "Page size with which read-ahead performs server I/O"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"read-ahead"},
+ .description = "Enable/Disable read ahead translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "read-ahead",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h
index d1d768c34fb..e9432fb47cc 100644
--- a/xlators/performance/read-ahead/src/read-ahead.h
+++ b/xlators/performance/read-ahead/src/read-ahead.h
@@ -11,17 +11,11 @@
#ifndef __READ_AHEAD_H
#define __READ_AHEAD_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
#include "read-ahead-mem-types.h"
struct ra_conf;
@@ -30,84 +24,77 @@ struct ra_page;
struct ra_file;
struct ra_waitq;
-
struct ra_waitq {
- struct ra_waitq *next;
- void *data;
+ struct ra_waitq *next;
+ void *data;
};
-
struct ra_fill {
- struct ra_fill *next;
- struct ra_fill *prev;
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
- struct iobref *iobref;
+ struct ra_fill *next;
+ struct ra_fill *prev;
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
};
-
struct ra_local {
- mode_t mode;
- struct ra_fill fill;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- off_t pending_offset;
- size_t pending_size;
- fd_t *fd;
- int32_t wait_count;
- pthread_mutex_t local_lock;
+ mode_t mode;
+ struct ra_fill fill;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ off_t pending_offset;
+ size_t pending_size;
+ fd_t *fd;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
};
-
struct ra_page {
- struct ra_page *next;
- struct ra_page *prev;
- struct ra_file *file;
- char dirty; /* Internal request, not from user. */
- char poisoned; /* Pending read invalidated by write. */
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ra_waitq *waitq;
- struct iobref *iobref;
- char stale;
+ struct ra_page *next;
+ struct ra_page *prev;
+ struct ra_file *file;
+ char dirty; /* Internal request, not from user. */
+ char poisoned; /* Pending read invalidated by write. */
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ra_waitq *waitq;
+ struct iobref *iobref;
+ char stale;
};
-
struct ra_file {
- struct ra_file *next;
- struct ra_file *prev;
- struct ra_conf *conf;
- fd_t *fd;
- int disabled;
- size_t expected;
- struct ra_page pages;
- off_t offset;
- size_t size;
- int32_t refcount;
- pthread_mutex_t file_lock;
- struct iatt stbuf;
- uint64_t page_size;
- uint32_t page_count;
+ struct ra_file *next;
+ struct ra_file *prev;
+ struct ra_conf *conf;
+ fd_t *fd;
+ int disabled;
+ size_t expected;
+ struct ra_page pages;
+ off_t offset;
+ size_t size;
+ int32_t refcount;
+ pthread_mutex_t file_lock;
+ struct iatt stbuf;
+ uint64_t page_size;
+ uint32_t page_count;
};
-
struct ra_conf {
- uint64_t page_size;
- uint32_t page_count;
- void *cache_block;
- struct ra_file files;
- gf_boolean_t force_atime_update;
- pthread_mutex_t conf_lock;
+ uint64_t page_size;
+ uint32_t page_count;
+ void *cache_block;
+ struct ra_file files;
+ gf_boolean_t force_atime_update;
+ pthread_mutex_t conf_lock;
};
-
typedef struct ra_conf ra_conf_t;
typedef struct ra_local ra_local_t;
typedef struct ra_page ra_page_t;
@@ -116,77 +103,69 @@ typedef struct ra_waitq ra_waitq_t;
typedef struct ra_fill ra_fill_t;
ra_page_t *
-ra_page_get (ra_file_t *file,
- off_t offset);
+ra_page_get(ra_file_t *file, off_t offset);
ra_page_t *
-ra_page_create (ra_file_t *file,
- off_t offset);
+ra_page_create(ra_file_t *file, off_t offset);
void
-ra_page_fault (ra_file_t *file,
- call_frame_t *frame,
- off_t offset);
+ra_page_fault(ra_file_t *file, call_frame_t *frame, off_t offset);
void
-ra_wait_on_page (ra_page_t *page,
- call_frame_t *frame);
+ra_wait_on_page(ra_page_t *page, call_frame_t *frame);
ra_waitq_t *
-ra_page_wakeup (ra_page_t *page);
+ra_page_wakeup(ra_page_t *page);
void
-ra_page_flush (ra_page_t *page);
+ra_page_flush(ra_page_t *page);
ra_waitq_t *
-ra_page_error (ra_page_t *page,
- int32_t op_ret,
- int32_t op_errno);
+ra_page_error(ra_page_t *page, int32_t op_ret, int32_t op_errno);
void
-ra_page_purge (ra_page_t *page);
+ra_page_purge(ra_page_t *page);
void
-ra_frame_return (call_frame_t *frame);
+ra_frame_return(call_frame_t *frame);
void
-ra_frame_fill (ra_page_t *page,
- call_frame_t *frame);
+ra_frame_fill(ra_page_t *page, call_frame_t *frame);
void
-ra_file_destroy (ra_file_t *file);
+ra_file_destroy(ra_file_t *file);
static inline void
-ra_file_lock (ra_file_t *file)
+ra_file_lock(ra_file_t *file)
{
- pthread_mutex_lock (&file->file_lock);
+ pthread_mutex_lock(&file->file_lock);
}
static inline void
-ra_file_unlock (ra_file_t *file)
+ra_file_unlock(ra_file_t *file)
{
- pthread_mutex_unlock (&file->file_lock);
+ pthread_mutex_unlock(&file->file_lock);
}
static inline void
-ra_conf_lock (ra_conf_t *conf)
+ra_conf_lock(ra_conf_t *conf)
{
- pthread_mutex_lock (&conf->conf_lock);
+ pthread_mutex_lock(&conf->conf_lock);
}
static inline void
-ra_conf_unlock (ra_conf_t *conf)
+ra_conf_unlock(ra_conf_t *conf)
{
- pthread_mutex_unlock (&conf->conf_lock);
+ pthread_mutex_unlock(&conf->conf_lock);
}
static inline void
-ra_local_lock (ra_local_t *local)
+ra_local_lock(ra_local_t *local)
{
- pthread_mutex_lock (&local->local_lock);
+ pthread_mutex_lock(&local->local_lock);
}
static inline void
-ra_local_unlock (ra_local_t *local)
+ra_local_unlock(ra_local_t *local)
{
- pthread_mutex_unlock (&local->local_lock);
+ pthread_mutex_unlock(&local->local_lock);
}
#endif /* __READ_AHEAD_H */
diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/performance/readdir-ahead/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am
new file mode 100644
index 00000000000..3d6b6ae951f
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/Makefile.am
@@ -0,0 +1,18 @@
+xlator_LTLIBRARIES = readdir-ahead.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+readdir_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+readdir_ahead_la_SOURCES = readdir-ahead.c
+readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h \
+ readdir-ahead-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
new file mode 100644
index 00000000000..498ffae7f64
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RDA_MEM_TYPES_H__
+#define __RDA_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_rda_mem_types_ {
+ gf_rda_mt_rda_local = gf_common_mt_end + 1,
+ gf_rda_mt_rda_fd_ctx,
+ gf_rda_mt_rda_priv,
+ gf_rda_mt_inode_ctx_t,
+ gf_rda_mt_end
+};
+
+#endif
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
new file mode 100644
index 00000000000..28ec14dd845
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
@@ -0,0 +1,30 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _READDIR_AHEAD_MESSAGES_H_
+#define _READDIR_AHEAD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(READDIR_AHEAD, READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED, READDIR_AHEAD_MSG_NO_MEMORY,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ READDIR_AHEAD_MSG_OUT_OF_SEQUENCE, READDIR_AHEAD_MSG_DICT_OP_FAILED);
+
+#endif /* _READDIR_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
new file mode 100644
index 00000000000..4ba7ee7077a
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
@@ -0,0 +1,1382 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * performance/readdir-ahead preloads a local buffer with directory entries
+ * on opendir. The optimization involves using maximum sized gluster rpc
+ * requests (128k) to minimize overhead of smaller client requests.
+ *
+ * For example, fuse currently supports a maximum readdir buffer of 4k
+ * (regardless of the filesystem client's buffer size). readdir-ahead should
+ * effectively convert these smaller requests into fewer, larger sized requests
+ * for simple, sequential workloads (i.e., ls).
+ *
+ * The translator is currently designed to handle the simple, sequential case
+ * only. If a non-sequential directory read occurs, readdir-ahead disables
+ * preloads on the directory.
+ */
+
+#include <math.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include "readdir-ahead.h"
+#include "readdir-ahead-mem-types.h"
+#include <glusterfs/defaults.h>
+#include "readdir-ahead-messages.h"
+static int
+rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+
+static void
+rda_local_wipe(struct rda_local *local)
+{
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->xattrs)
+ dict_unref(local->xattrs);
+ if (local->inode)
+ inode_unref(local->inode);
+}
+
+/*
+ * Get (or create) the fd context for storing prepopulated directory
+ * entries.
+ */
+static struct rda_fd_ctx *
+get_rda_fd_ctx(fd_t *fd, xlator_t *this)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ LOCK(&fd->lock);
+
+ if (__fd_ctx_get(fd, this, &val) < 0) {
+ ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), gf_rda_mt_rda_fd_ctx);
+ if (!ctx)
+ goto out;
+
+ LOCK_INIT(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->entries.list);
+ ctx->state = RDA_FD_NEW;
+ /* ctx offset values initialized to 0 */
+ ctx->xattrs = NULL;
+
+ if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx) < 0) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ goto out;
+ }
+ } else {
+ ctx = (struct rda_fd_ctx *)(uintptr_t)val;
+ }
+out:
+ UNLOCK(&fd->lock);
+ return ctx;
+}
+
+static rda_inode_ctx_t *
+__rda_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get1(inode, this, &ctx_uint);
+ if (ret == 0)
+ return (rda_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_rda_mt_inode_ctx_t);
+ if (!ctx_p)
+ return NULL;
+
+ GF_ATOMIC_INIT(ctx_p->generation, 0);
+
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set1(inode, this, &ctx_uint);
+ if (ret < 0) {
+ GF_FREE(ctx_p);
+ return NULL;
+ }
+
+ return ctx_p;
+}
+
+static int
+__rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+ struct iatt tmp_stat = {
+ 0,
+ };
+
+ ctx_p = __rda_inode_ctx_get(inode, this);
+ if (!ctx_p)
+ return -1;
+
+ if ((!stbuf_in) || (stbuf_in->ia_ctime == 0)) {
+ /* A fop modified a file but valid stbuf is not provided.
+ * Can't update iatt to reflect results of fop and hence
+ * invalidate the iatt stored in dentry.
+ *
+ * An example of this case can be response of write request
+ * that is cached in write-behind.
+ */
+ if (stbuf_in)
+ tmp_stat = *stbuf_in;
+ else
+ tmp_stat = ctx_p->statbuf;
+ memset(&ctx_p->statbuf, 0, sizeof(ctx_p->statbuf));
+ gf_uuid_copy(ctx_p->statbuf.ia_gfid, tmp_stat.ia_gfid);
+ ctx_p->statbuf.ia_type = tmp_stat.ia_type;
+ GF_ATOMIC_INC(ctx_p->generation);
+ } else {
+ if (ctx_p->statbuf.ia_ctime) {
+ if (stbuf_in->ia_ctime < ctx_p->statbuf.ia_ctime) {
+ goto out;
+ }
+
+ if ((stbuf_in->ia_ctime == ctx_p->statbuf.ia_ctime) &&
+ (stbuf_in->ia_ctime_nsec < ctx_p->statbuf.ia_ctime_nsec)) {
+ goto out;
+ }
+ } else {
+ if ((generation != -1) &&
+ (generation != GF_ATOMIC_GET(ctx_p->generation)))
+ goto out;
+ }
+
+ ctx_p->statbuf = *stbuf_in;
+ }
+
+out:
+ if (stbuf_out)
+ *stbuf_out = ctx_p->statbuf;
+
+ return 0;
+}
+
+static int
+rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __rda_inode_ctx_update_iatts(inode, this, stbuf_in, stbuf_out,
+ generation);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+/*
+ * Reset the tracking state of the context.
+ */
+static void
+rda_reset_ctx(xlator_t *this, struct rda_fd_ctx *ctx)
+{
+ struct rda_priv *priv = NULL;
+
+ priv = this->private;
+
+ ctx->state = RDA_FD_NEW;
+ ctx->cur_offset = 0;
+ ctx->next_offset = 0;
+ ctx->op_errno = 0;
+
+ gf_dirent_free(&ctx->entries);
+ GF_ATOMIC_SUB(priv->rda_cache_size, ctx->cur_size);
+ ctx->cur_size = 0;
+
+ if (ctx->xattrs) {
+ dict_unref(ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+}
+
+static void
+rda_mark_inode_dirty(xlator_t *this, inode_t *inode)
+{
+ inode_t *parent = NULL;
+ fd_t *fd = NULL;
+ uint64_t val = 0;
+ int32_t ret = 0;
+ struct rda_fd_ctx *fd_ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ parent = inode_parent(inode, NULL, NULL);
+ if (parent) {
+ LOCK(&parent->lock);
+ {
+ list_for_each_entry(fd, &parent->fd_list, inode_list)
+ {
+ val = 0;
+ fd_ctx_get(fd, this, &val);
+ if (val == 0)
+ continue;
+
+ fd_ctx = (void *)(uintptr_t)val;
+ uuid_utoa_r(inode->gfid, gfid);
+ if (!GF_ATOMIC_GET(fd_ctx->prefetching))
+ continue;
+
+ LOCK(&fd_ctx->lock);
+ {
+ if (GF_ATOMIC_GET(fd_ctx->prefetching)) {
+ if (fd_ctx->writes_during_prefetch == NULL)
+ fd_ctx->writes_during_prefetch = dict_new();
+
+ ret = dict_set_int8(fd_ctx->writes_during_prefetch,
+ gfid, 1);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "marking to invalidate stats of %s from an "
+ "in progress "
+ "prefetching has failed, might result in "
+ "stale stat to "
+ "application",
+ gfid);
+ }
+ }
+ }
+ UNLOCK(&fd_ctx->lock);
+ }
+ }
+ UNLOCK(&parent->lock);
+ inode_unref(parent);
+ }
+
+ return;
+}
+
+/*
+ * Check whether we can handle a request. Offset verification is done by the
+ * caller, so we only check whether the preload buffer has completion status
+ * (including an error) or has some data to return.
+ */
+static gf_boolean_t
+rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
+{
+ if ((ctx->state & RDA_FD_EOD) || (ctx->state & RDA_FD_ERROR) ||
+ (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) ||
+ (request_size && ctx->cur_size >= request_size))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+rda_inode_ctx_get_iatt(inode_t *inode, xlator_t *this, struct iatt *attr)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ if (!inode || !this || !attr)
+ goto out;
+
+ LOCK(&inode->lock);
+ {
+ ctx_p = __rda_inode_ctx_get(inode, this);
+ if (ctx_p) {
+ *attr = ctx_p->statbuf;
+ }
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return;
+}
+
+/*
+ * Serve a request from the fd dentry list based on the size of the request
+ * buffer. ctx must be locked.
+ */
+static int32_t
+__rda_fill_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
+ struct rda_fd_ctx *ctx)
+{
+ gf_dirent_t *dirent, *tmp;
+ size_t dirent_size, size = 0;
+ int32_t count = 0;
+ struct rda_priv *priv = NULL;
+ struct iatt tmp_stat = {
+ 0,
+ };
+
+ priv = this->private;
+
+ list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list)
+ {
+ dirent_size = gf_dirent_size(dirent->d_name);
+ if (size + dirent_size > request_size)
+ break;
+
+ memset(&tmp_stat, 0, sizeof(tmp_stat));
+
+ if (dirent->inode && (!((strcmp(dirent->d_name, ".") == 0) ||
+ (strcmp(dirent->d_name, "..") == 0)))) {
+ rda_inode_ctx_get_iatt(dirent->inode, this, &tmp_stat);
+ dirent->d_stat = tmp_stat;
+ }
+
+ size += dirent_size;
+ list_del_init(&dirent->list);
+ ctx->cur_size -= dirent_size;
+
+ GF_ATOMIC_SUB(priv->rda_cache_size, dirent_size);
+
+ list_add_tail(&dirent->list, &entries->list);
+ ctx->cur_offset = dirent->d_off;
+ count++;
+ }
+
+ if (ctx->cur_size <= priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+
+ return count;
+}
+
+static int32_t
+__rda_serve_readdirp(xlator_t *this, struct rda_fd_ctx *ctx, size_t size,
+ gf_dirent_t *entries, int *op_errno)
+{
+ int32_t ret = 0;
+
+ ret = __rda_fill_readdirp(this, entries, size, ctx);
+
+ if (!ret && (ctx->state & RDA_FD_ERROR)) {
+ ret = -1;
+ ctx->state &= ~RDA_FD_ERROR;
+
+ /*
+ * the preload has stopped running in the event of an error, so
+ * pass all future requests along
+ */
+ ctx->state |= RDA_FD_BYPASS;
+ }
+ /*
+ * Use the op_errno sent by lower layers as xlators above will check
+ * the op_errno for identifying whether readdir is completed or not.
+ */
+ *op_errno = ctx->op_errno;
+
+ return ret;
+}
+
+static int32_t
+rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ struct rda_fd_ctx *ctx = NULL;
+ int fill = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ int op_errno = 0;
+ gf_boolean_t serve = _gf_false;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ if (ctx->state & RDA_FD_BYPASS)
+ goto bypass;
+
+ INIT_LIST_HEAD(&entries.list);
+ LOCK(&ctx->lock);
+
+ /* recheck now that we have the lock */
+ if (ctx->state & RDA_FD_BYPASS) {
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If a new read comes in at offset 0 and the buffer has been
+ * completed, reset the context and kickstart the filler again.
+ */
+ if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) {
+ rda_reset_ctx(this, ctx);
+ /*
+ * Unref and discard the 'list of xattrs to be fetched'
+ * stored during opendir call. This is done above - inside
+ * rda_reset_ctx().
+ * Now, ref the xdata passed by md-cache in actual readdirp()
+ * call and use that for all subsequent internal readdirp()
+ * requests issued by this xlator.
+ */
+ ctx->xattrs = dict_ref(xdata);
+ fill = 1;
+ }
+
+ /*
+ * If a readdir occurs at an unexpected offset or we already have a
+ * request pending, admit defeat and just get out of the way.
+ */
+ if (off != ctx->cur_offset || ctx->stub) {
+ ctx->state |= RDA_FD_BYPASS;
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If we haven't bypassed the preload, this means we can either serve
+ * the request out of the preload or the request that enables us to do
+ * so is in flight...
+ */
+ if (rda_can_serve_readdirp(ctx, size)) {
+ ret = __rda_serve_readdirp(this, ctx, size, &entries, &op_errno);
+ serve = _gf_true;
+
+ if (op_errno == ENOENT &&
+ !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)))
+ op_errno = 0;
+ } else {
+ ctx->stub = fop_readdirp_stub(frame, NULL, fd, size, off, xdata);
+ if (!ctx->stub) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 1;
+ if (!ctx->xattrs)
+ ctx->xattrs = dict_ref(xdata);
+ ctx->state |= RDA_FD_RUNNING;
+ }
+ }
+
+ UNLOCK(&ctx->lock);
+
+ if (serve) {
+ STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata);
+ gf_dirent_free(&entries);
+ }
+
+ if (fill)
+ rda_fill_fd(frame, this, fd);
+
+ return 0;
+
+bypass:
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *dirent = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t serve_entries;
+ struct rda_local *local = frame->local;
+ struct rda_fd_ctx *ctx = local->ctx;
+ struct rda_priv *priv = this->private;
+ int fill = 1;
+ size_t dirent_size = 0;
+ int ret = 0;
+ gf_boolean_t serve = _gf_false;
+ call_stub_t *stub = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ uint64_t generation = 0;
+ call_frame_t *fill_frame = NULL;
+
+ INIT_LIST_HEAD(&serve_entries.list);
+ LOCK(&ctx->lock);
+
+ /* Verify that the preload buffer is still pending on this data. */
+ if (ctx->next_offset != local->offset) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, READDIR_AHEAD_MSG_OUT_OF_SEQUENCE,
+ "Out of sequence directory preload.");
+ ctx->state |= (RDA_FD_BYPASS | RDA_FD_ERROR);
+ ctx->op_errno = EUCLEAN;
+
+ goto out;
+ }
+
+ if (entries) {
+ list_for_each_entry_safe(dirent, tmp, &entries->list, list)
+ {
+ list_del_init(&dirent->list);
+
+ /* must preserve entry order */
+ list_add_tail(&dirent->list, &ctx->entries.list);
+ if (dirent->inode) {
+ /* If ctxp->stat is invalidated, don't update it
+ * with dirent->d_stat as we don't have
+ * generation number of the inode when readdirp
+ * request was initiated. So, we pass 0 for
+ * generation number
+ */
+
+ generation = -1;
+ if (ctx->writes_during_prefetch) {
+ memset(gfid, 0, sizeof(gfid));
+ uuid_utoa_r(dirent->inode->gfid, gfid);
+ if (dict_get(ctx->writes_during_prefetch, gfid))
+ generation = 0;
+ }
+
+ if (!((strcmp(dirent->d_name, ".") == 0) ||
+ (strcmp(dirent->d_name, "..") == 0))) {
+ rda_inode_ctx_update_iatts(dirent->inode, this,
+ &dirent->d_stat, &dirent->d_stat,
+ generation);
+ }
+ }
+
+ dirent_size = gf_dirent_size(dirent->d_name);
+
+ ctx->cur_size += dirent_size;
+
+ GF_ATOMIC_ADD(priv->rda_cache_size, dirent_size);
+
+ ctx->next_offset = dirent->d_off;
+ }
+ }
+
+ if (ctx->writes_during_prefetch) {
+ dict_unref(ctx->writes_during_prefetch);
+ ctx->writes_during_prefetch = NULL;
+ }
+
+ GF_ATOMIC_DEC(ctx->prefetching);
+
+ if (ctx->cur_size >= priv->rda_high_wmark)
+ ctx->state &= ~RDA_FD_PLUGGED;
+
+ if (!op_ret || op_errno == ENOENT) {
+ /* we've hit eod */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_EOD;
+ ctx->op_errno = op_errno;
+ } else if (op_ret == -1) {
+ /* kill the preload and pend the error */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_ERROR;
+ ctx->op_errno = op_errno;
+ }
+
+ /*
+ * NOTE: The strict bypass logic in readdirp() means a pending request
+ * is always based on ctx->cur_offset.
+ */
+ if (ctx->stub && rda_can_serve_readdirp(ctx, ctx->stub->args.size)) {
+ ret = __rda_serve_readdirp(this, ctx, ctx->stub->args.size,
+ &serve_entries, &op_errno);
+ serve = _gf_true;
+ stub = ctx->stub;
+ ctx->stub = NULL;
+ }
+
+out:
+ /*
+ * If we have been marked for bypass and have no pending stub, clear the
+ * run state so we stop preloading the context with entries.
+ */
+ if (!ctx->stub &&
+ ((ctx->state & RDA_FD_BYPASS) ||
+ GF_ATOMIC_GET(priv->rda_cache_size) > priv->rda_cache_limit))
+ ctx->state &= ~RDA_FD_RUNNING;
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 0;
+ if (ctx->xattrs) {
+ /*
+ * fill = 0 and hence rda_fill_fd() won't be invoked.
+ * unref for ref taken in rda_fill_fd()
+ */
+ dict_unref(ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+
+ fill_frame = ctx->fill_frame;
+ ctx->fill_frame = NULL;
+ }
+
+ if (op_errno == ENOENT &&
+ !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)))
+ op_errno = 0;
+
+ UNLOCK(&ctx->lock);
+ if (fill_frame) {
+ rda_local_wipe(fill_frame->local);
+ STACK_DESTROY(fill_frame->root);
+ }
+
+ if (serve) {
+ STACK_UNWIND_STRICT(readdirp, stub->frame, ret, op_errno,
+ &serve_entries, xdata);
+ gf_dirent_free(&serve_entries);
+ call_stub_destroy(stub);
+ }
+
+ if (fill)
+ rda_fill_fd(frame, this, local->fd);
+
+ return 0;
+}
+
+/*
+ * Start prepopulating the fd context with directory entries.
+ */
+static int
+rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ call_frame_t *nframe = NULL;
+ struct rda_local *local = NULL;
+ struct rda_local *orig_local = frame->local;
+ struct rda_fd_ctx *ctx;
+ off_t offset;
+ struct rda_priv *priv = this->private;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ LOCK(&ctx->lock);
+
+ if (ctx->state & RDA_FD_NEW) {
+ ctx->state &= ~RDA_FD_NEW;
+ ctx->state |= RDA_FD_RUNNING;
+ if (priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+ }
+
+ offset = ctx->next_offset;
+
+ if (!ctx->fill_frame) {
+ nframe = copy_frame(frame);
+ if (!nframe) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local->ctx = ctx;
+ local->fd = fd_ref(fd);
+ nframe->local = local;
+
+ ctx->fill_frame = nframe;
+
+ if (!ctx->xattrs && orig_local && orig_local->xattrs) {
+ /* when this function is invoked by rda_opendir_cbk */
+ ctx->xattrs = dict_ref(orig_local->xattrs);
+ }
+ } else {
+ nframe = ctx->fill_frame;
+ local = nframe->local;
+ }
+
+ local->offset = offset;
+ GF_ATOMIC_INC(ctx->prefetching);
+
+ UNLOCK(&ctx->lock);
+
+ STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size,
+ offset, ctx->xattrs);
+
+ return 0;
+
+err:
+ if (nframe) {
+ rda_local_wipe(nframe->local);
+ FRAME_DESTROY(nframe);
+ }
+
+ return -1;
+}
+
+static int32_t
+rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ if (!op_ret)
+ rda_fill_fd(frame, this, fd);
+
+ RDA_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ struct rda_local *local = NULL;
+
+ if (xdata) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /*
+ * Retrieve list of keys set by md-cache xlator and store it
+ * in local to be consumed in rda_opendir_cbk
+ */
+ local->xattrs = dict_copy_with_ref(xdata, NULL);
+ frame->local = local;
+ }
+
+ STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ rda_mark_inode_dirty(this, local->inode);
+
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(writev, frame, this, fd->inode, xdata, fd,
+ vector, count, off, flags, iobref);
+ return 0;
+}
+
+static int32_t
+rda_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fallocate, frame, this, fd->inode, xdata, fd,
+ keep_size, offset, len);
+ return 0;
+}
+
+static int32_t
+rda_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(zerofill, frame, this, fd->inode, xdata, fd,
+ offset, len);
+ return 0;
+}
+
+static int32_t
+rda_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(discard, frame, this, fd->inode, xdata, fd,
+ offset, len);
+ return 0;
+}
+
+static int32_t
+rda_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(ftruncate, frame, this, fd->inode, xdata, fd,
+ offset);
+ return 0;
+}
+
+static int32_t
+rda_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(truncate, frame, this, loc->inode, xdata, loc,
+ offset);
+ return 0;
+}
+
+static int32_t
+rda_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(setxattr, frame, this, loc->inode, xdata, loc,
+ dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fsetxattr, frame, this, fd->inode, xdata, fd,
+ dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(setattr, frame, this, loc->inode, xdata, loc,
+ stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fsetattr, frame, this, fd->inode, xdata, fd,
+ stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(removexattr, frame, this, loc->inode, xdata,
+ loc, name);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fremovexattr, frame, this, fd->inode, xdata, fd,
+ name);
+ return 0;
+}
+
+static int32_t
+rda_releasedir(xlator_t *this, fd_t *fd)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ if (fd_ctx_del(fd, this, &val) < 0)
+ return -1;
+
+ ctx = (struct rda_fd_ctx *)(uintptr_t)val;
+ if (!ctx)
+ return 0;
+
+ rda_reset_ctx(this, ctx);
+
+ if (ctx->fill_frame)
+ STACK_DESTROY(ctx->fill_frame->root);
+
+ if (ctx->stub)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ "released a directory with a pending stub");
+
+ GF_FREE(ctx);
+ return 0;
+}
+
+static int
+rda_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del1(inode, this, &ctx_uint);
+ if (!ctx_uint)
+ return 0;
+
+ ctx = (rda_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ GF_FREE(ctx);
+
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1);
+
+ if (ret != 0)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READDIR_AHEAD_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
+
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ struct rda_priv *priv = this->private;
+
+ GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
+ err);
+ GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("parallel-readdir", priv->parallel_readdir, options, bool,
+ err);
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, err);
+
+ return 0;
+err:
+ return -1;
+}
+
+int
+init(xlator_t *this)
+{
+ struct rda_priv *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: readdir-ahead not configured with exactly one"
+ " child");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv);
+ if (!priv)
+ goto err;
+ this->private = priv;
+
+ GF_ATOMIC_INIT(priv->rda_cache_size, 0);
+
+ this->local_pool = mem_pool_new(struct rda_local, 32);
+ if (!this->local_pool)
+ goto err;
+
+ GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64, err);
+ GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64, err);
+ GF_OPTION_INIT("parallel-readdir", priv->parallel_readdir, bool, err);
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, err);
+
+ return 0;
+
+err:
+ if (this->local_pool)
+ mem_pool_destroy(this->local_pool);
+ if (priv)
+ GF_FREE(priv);
+
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, out);
+
+ GF_FREE(this->private);
+
+out:
+ return;
+}
+
+struct xlator_fops fops = {
+ .opendir = rda_opendir,
+ .readdirp = rda_readdirp,
+ /* inode write */
+ /* TODO: invalidate a dentry's stats if its pointing to a directory
+ * when entry operations happen in that directory
+ */
+ .writev = rda_writev,
+ .truncate = rda_truncate,
+ .ftruncate = rda_ftruncate,
+ .fallocate = rda_fallocate,
+ .discard = rda_discard,
+ .zerofill = rda_zerofill,
+ /* metadata write */
+ .setxattr = rda_setxattr,
+ .fsetxattr = rda_fsetxattr,
+ .setattr = rda_setattr,
+ .fsetattr = rda_fsetattr,
+ .removexattr = rda_removexattr,
+ .fremovexattr = rda_fremovexattr,
+};
+
+struct xlator_cbks cbks = {
+ .releasedir = rda_releasedir,
+ .forget = rda_forget,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"readdir-ahead"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable readdir-ahead",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"rda-request-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 131072,
+ .default_value = "131072",
+ .description = "size of buffer in readdirp calls initiated by "
+ "readdir-ahead ",
+ },
+ {
+ .key = {"rda-low-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 10 * GF_UNIT_MB,
+ .default_value = "4096",
+ .description = "the value under which readdir-ahead plugs",
+ },
+ {
+ .key = {"rda-high-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 100 * GF_UNIT_MB,
+ .default_value = "128KB",
+ .description = "the value over which readdir-ahead unplugs",
+ },
+ {
+ .key = {"rda-cache-limit"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = INFINITY,
+ .default_value = "10MB",
+ .description = "maximum size of cache consumed by readdir-ahead "
+ "xlator. This value is global and total memory "
+ "consumption by readdir-ahead is capped by this "
+ "value, irrespective of the number/size of "
+ "directories cached",
+ },
+ {.key = {"parallel-readdir"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .default_value = "off",
+ .description = "If this option is enabled, the readdir operation "
+ "is performed in parallel on all the bricks, thus "
+ "improving the performance of readdir. Note that "
+ "the performance improvement is higher in large "
+ "clusters"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"readdir-ahead"},
+ .description = "Enable/Disable readdir ahead translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "readdir-ahead",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
new file mode 100644
index 00000000000..619c41059ff
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READDIR_AHEAD_H
+#define __READDIR_AHEAD_H
+
+/* state flags */
+#define RDA_FD_NEW (1 << 0)
+#define RDA_FD_RUNNING (1 << 1)
+#define RDA_FD_EOD (1 << 2)
+#define RDA_FD_ERROR (1 << 3)
+#define RDA_FD_BYPASS (1 << 4)
+#define RDA_FD_PLUGGED (1 << 5)
+
+#define RDA_COMMON_MODIFICATION_FOP(name, frame, this, __inode, __xdata, \
+ args...) \
+ do { \
+ struct rda_local *__local = NULL; \
+ rda_inode_ctx_t *ctx_p = NULL; \
+ \
+ __local = mem_get0(this->local_pool); \
+ __local->inode = inode_ref(__inode); \
+ LOCK(&__inode->lock); \
+ { \
+ ctx_p = __rda_inode_ctx_get(__inode, this); \
+ } \
+ UNLOCK(&__inode->lock); \
+ __local->generation = GF_ATOMIC_GET(ctx_p->generation); \
+ \
+ frame->local = __local; \
+ if (__xdata) \
+ __local->xattrs = dict_ref(__xdata); \
+ \
+ STACK_WIND(frame, rda_##name##_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->name, args, __xdata); \
+ } while (0)
+
+#define RDA_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ struct rda_local *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local) { \
+ rda_local_wipe(__local); \
+ mem_put(__local); \
+ } \
+ } while (0)
+
+struct rda_fd_ctx {
+ off_t cur_offset; /* current head of the ctx */
+ size_t cur_size; /* current size of the preload */
+ off_t next_offset; /* tail of the ctx */
+ uint32_t state;
+ gf_lock_t lock;
+ gf_dirent_t entries;
+ call_frame_t *fill_frame;
+ call_stub_t *stub;
+ int op_errno;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ dict_t *writes_during_prefetch;
+ gf_atomic_t prefetching;
+};
+
+struct rda_local {
+ struct rda_fd_ctx *ctx;
+ fd_t *fd;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ inode_t *inode;
+ off_t offset;
+ uint64_t generation;
+ int32_t skip_dir;
+};
+
+struct rda_priv {
+ uint64_t rda_req_size;
+ uint64_t rda_low_wmark;
+ uint64_t rda_high_wmark;
+ uint64_t rda_cache_limit;
+ gf_atomic_t rda_cache_size;
+ gf_boolean_t parallel_readdir;
+};
+
+typedef struct rda_inode_ctx {
+ struct iatt statbuf;
+ gf_atomic_t generation;
+} rda_inode_ctx_t;
+
+#endif /* __READDIR_AHEAD_H */
diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am
deleted file mode 100644
index 4091c329325..00000000000
--- a/xlators/performance/symlink-cache/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-xlator_LTLIBRARIES = symlink-cache.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance
-
-symlink_cache_la_LDFLAGS = -module -avoid-version
-
-symlink_cache_la_SOURCES = symlink-cache.c
-symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c
deleted file mode 100644
index 3b5fbc252ec..00000000000
--- a/xlators/performance/symlink-cache/src/symlink-cache.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "list.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "common-utils.h"
-
-struct symlink_cache {
- time_t ctime;
- char *readlink;
-};
-
-
-static int
-symlink_inode_ctx_get (inode_t *inode, xlator_t *this, void **ctx)
-{
- int ret = 0;
- uint64_t tmp_ctx = 0;
- ret = inode_ctx_get (inode, this, &tmp_ctx);
- if (-1 == ret)
- gf_log (this->name, GF_LOG_ERROR, "dict get failed");
- else
- *ctx = (void *)(long)tmp_ctx;
-
- return 0;
-}
-
-
-static int
-symlink_inode_ctx_set (inode_t *inode, xlator_t *this, void *ctx)
-{
- int ret = 0;
- ret = inode_ctx_put (inode, this, (uint64_t)(long) ctx);
- if (-1 == ret)
- gf_log (this->name, GF_LOG_ERROR, "dict set failed");
-
- return 0;
-}
-
-
-int
-sc_cache_update (xlator_t *this, inode_t *inode, const char *link)
-{
- struct symlink_cache *sc = NULL;
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
- if (!sc)
- return 0;
-
- if (!sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "updating cache: %s", link);
-
- sc->readlink = strdup (link);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "not updating existing cache: %s with %s",
- sc->readlink, link);
- }
-
- return 0;
-}
-
-
-int
-sc_cache_set (xlator_t *this, inode_t *inode, struct iatt *buf,
- const char *link)
-{
- struct symlink_cache *sc = NULL;
- int ret = -1;
- int need_set = 0;
-
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
- if (!sc) {
- need_set = 1;
- sc = CALLOC (1, sizeof (*sc));
- if (!sc) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto err;
- }
- }
-
- if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "replacing old cache: %s with new cache: %s",
- sc->readlink, link);
- FREE (sc->readlink);
- sc->readlink = NULL;
- }
-
- if (link) {
- sc->readlink = strdup (link);
- if (!sc->readlink) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto err;
- }
- }
-
- sc->ctime = buf->ia_ctime;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting symlink cache: %s", link);
-
- if (need_set) {
- ret = symlink_inode_ctx_set (inode, this, sc);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set inode context (%s)",
- strerror (-ret));
- goto err;
- }
- }
-
- return 0;
-err:
-
- if (sc) {
- FREE (sc->readlink);
- sc->readlink = NULL;
- FREE (sc);
- }
-
- return -1;
-}
-
-
-int
-sc_cache_flush (xlator_t *this, inode_t *inode)
-{
- struct symlink_cache *sc = NULL;
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
- if (!sc)
- return 0;
-
- if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flushing cache: %s", sc->readlink);
-
- FREE (sc->readlink);
- sc->readlink = NULL;
- }
-
- FREE (sc);
-
- return 0;
-}
-
-
-int
-sc_cache_validate (xlator_t *this, inode_t *inode, struct iatt *buf)
-{
- struct symlink_cache *sc = NULL;
- uint64_t tmp_sc = 0;
-
- if (!IA_ISLNK (buf->ia_type)) {
- sc_cache_flush (this, inode);
- return 0;
- }
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
-
- if (!sc) {
- sc_cache_set (this, inode, buf, NULL);
- inode_ctx_get (inode, this, &tmp_sc);
-
- if (!sc) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- return 0;
- }
- sc = (struct symlink_cache *)(long)tmp_sc;
- }
-
- if (sc->ctime == buf->ia_ctime)
- return 0;
-
- /* STALE */
- if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flushing cache: %s", sc->readlink);
-
- FREE (sc->readlink);
- sc->readlink = NULL;
- }
-
- sc->ctime = buf->ia_ctime;
-
- return 0;
-}
-
-
-
-int
-sc_cache_get (xlator_t *this, inode_t *inode, char **link)
-{
- struct symlink_cache *sc = NULL;
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
-
- if (!sc)
- return 0;
-
- if (link && sc->readlink)
- *link = strdup (sc->readlink);
- return 0;
-}
-
-
-int
-sc_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- const char *link, struct iatt *sbuf, dict_t *xdata)
-{
- if (op_ret > 0)
- sc_cache_update (this, frame->local, link);
-
- inode_unref (frame->local);
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, link, sbuf,
- xdata);
- return 0;
-}
-
-
-int
-sc_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata)
-{
- char *link = NULL;
- struct iatt buf = {0, };
-
- sc_cache_get (this, loc->inode, &link);
-
- if (link) {
- /* cache hit */
- gf_log (this->name, GF_LOG_DEBUG,
- "cache hit %s -> %s",
- loc->path, link);
-
- /*
- libglusterfsclient, nfs or any other translators
- using buf in readlink_cbk should be aware that @buf
- is 0 filled
- */
- STACK_UNWIND_STRICT (readlink, frame, strlen (link), 0, link,
- &buf, NULL);
- FREE (link);
- return 0;
- }
-
- frame->local = inode_ref (loc->inode);
-
- STACK_WIND (frame, sc_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc, size, xdata);
-
- return 0;
-}
-
-
-int
-sc_symlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- if (op_ret == 0) {
- if (frame->local) {
- sc_cache_set (this, inode, buf, frame->local);
- }
- }
-
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int
-sc_symlink (call_frame_t *frame, xlator_t *this,
- const char *dst, loc_t *src, mode_t umask, dict_t *xdata)
-{
- frame->local = strdup (dst);
-
- STACK_WIND (frame, sc_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- dst, src, umask, xdata);
-
- return 0;
-}
-
-
-int
-sc_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
-{
- if (op_ret == 0)
- sc_cache_validate (this, inode, buf);
- else
- sc_cache_flush (this, inode);
-
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xdata, postparent);
- return 0;
-}
-
-
-int
-sc_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
-{
- STACK_WIND (frame, sc_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xdata);
-
- return 0;
-}
-
-
-int
-sc_forget (xlator_t *this,
- inode_t *inode)
-{
- sc_cache_flush (this, inode);
-
- return 0;
-}
-
-
-int32_t
-init (xlator_t *this)
-{
- if (!this->children || this->children->next)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- return 0;
-}
-
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = sc_lookup,
- .symlink = sc_symlink,
- .readlink = sc_readlink,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = sc_forget,
-};
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am
index 6c829d8ee36..a6a16fcc080 100644
--- a/xlators/performance/write-behind/src/Makefile.am
+++ b/xlators/performance/write-behind/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = write-behind.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-write_behind_la_LDFLAGS = -module -avoid-version
+write_behind_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
write_behind_la_SOURCES = write-behind.c
write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = write-behind-mem-types.h
+noinst_HEADERS = write-behind-mem-types.h write-behind-messages.h
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h
index f64f429ce22..a0647299150 100644
--- a/xlators/performance/write-behind/src/write-behind-mem-types.h
+++ b/xlators/performance/write-behind/src/write-behind-mem-types.h
@@ -8,19 +8,17 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __WB_MEM_TYPES_H__
#define __WB_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_wb_mem_types_ {
- gf_wb_mt_wb_file_t = gf_common_mt_end + 1,
- gf_wb_mt_wb_request_t,
- gf_wb_mt_iovec,
- gf_wb_mt_wb_conf_t,
- gf_wb_mt_wb_inode_t,
- gf_wb_mt_end
+ gf_wb_mt_wb_file_t = gf_common_mt_end + 1,
+ gf_wb_mt_wb_request_t,
+ gf_wb_mt_iovec,
+ gf_wb_mt_wb_conf_t,
+ gf_wb_mt_wb_inode_t,
+ gf_wb_mt_end
};
#endif
-
diff --git a/xlators/performance/write-behind/src/write-behind-messages.h b/xlators/performance/write-behind/src/write-behind-messages.h
new file mode 100644
index 00000000000..e9ea474879b
--- /dev/null
+++ b/xlators/performance/write-behind/src/write-behind-messages.h
@@ -0,0 +1,31 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _WRITE_BEHIND_MESSAGES_H_
+#define _WRITE_BEHIND_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(WRITE_BEHIND, WRITE_BEHIND_MSG_EXCEEDED_MAX_SIZE,
+ WRITE_BEHIND_MSG_INIT_FAILED, WRITE_BEHIND_MSG_INVALID_ARGUMENT,
+ WRITE_BEHIND_MSG_NO_MEMORY, WRITE_BEHIND_MSG_SIZE_NOT_SET,
+ WRITE_BEHIND_MSG_VOL_MISCONFIGURED,
+ WRITE_BEHIND_MSG_RES_UNAVAILABLE);
+
+#endif /* _WRITE_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index e01a227f30e..00cfca016e6 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -8,222 +8,258 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "list.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "common-utils.h"
-#include "call-stub.h"
-#include "statedump.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/defaults.h>
#include "write-behind-mem-types.h"
+#include "write-behind-messages.h"
-#define MAX_VECTOR_COUNT 8
-#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
-#define WB_WINDOW_SIZE 1048576 /* 1MB */
+#define MAX_VECTOR_COUNT 8
+#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
+#define WB_WINDOW_SIZE 1048576 /* 1MB */
typedef struct list_head list_head_t;
struct wb_conf;
struct wb_inode;
typedef struct wb_inode {
- ssize_t window_conf;
- ssize_t window_current;
- ssize_t transit; /* size of data stack_wound, and yet
- to be fulfilled (wb_fulfill_cbk).
- used for trickling_writes
- */
-
- list_head_t all; /* All requests, from enqueue() till destroy().
- Used only for resetting generation
- number when empty.
- */
- list_head_t todo; /* Work to do (i.e, STACK_WIND to server).
- Once we STACK_WIND, the entry is taken
- off the list. If it is non-sync write,
- then we continue to track it via @liability
- or @temptation depending on the status
- of its writeback.
- */
- list_head_t liability; /* Non-sync writes which are lied
- (STACK_UNWIND'ed to caller) but ack
- from server not yet complete. This
- is the "liability" which we hold, and
- must guarantee that dependent operations
- which arrive later (which overlap, etc.)
- are issued only after their dependencies
- in this list are "fulfilled".
-
- Server acks for entries in this list
- shrinks the window.
-
- The sum total of all req->write_size
- of entries in this list must be kept less
- than the permitted window size.
- */
- list_head_t temptation; /* Operations for which we are tempted
- to 'lie' (write-behind), but temporarily
- holding off (because of insufficient
- window capacity, etc.)
-
- This is the list to look at to grow
- the window (in __wb_pick_unwinds()).
-
- Entries typically get chosen from
- write-behind from this list, and therefore
- get "upgraded" to the "liability" list.
- */
- list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC
- which are currently STACK_WIND'ed towards the server.
- This is for guaranteeing that no two overlapping
- writes are in progress at the same time. Modules
- like eager-lock in AFR depend on this behavior.
- */
- uint64_t gen; /* Liability generation number. Represents
- the current 'state' of liability. Every
- new addition to the liability list bumps
- the generation number.
-
- a newly arrived request is only required
- to perform causal checks against the entries
- in the liability list which were present
- at the time of its addition. the generation
- number at the time of its addition is stored
- in the request and used during checks.
-
- the liability list can grow while the request
- waits in the todo list waiting for its
- dependent operations to complete. however
- it is not of the request's concern to depend
- itself on those new entries which arrived
- after it arrived (i.e, those that have a
- liability generation higher than itself)
- */
- gf_lock_t lock;
- xlator_t *this;
-} wb_inode_t;
+ ssize_t window_conf;
+ ssize_t window_current;
+ ssize_t transit; /* size of data stack_wound, and yet
+ to be fulfilled (wb_fulfill_cbk).
+ used for trickling_writes
+ */
+
+ list_head_t all; /* All requests, from enqueue() till destroy().
+ Used only for resetting generation
+ number when empty.
+ */
+ list_head_t todo; /* Work to do (i.e, STACK_WIND to server).
+ Once we STACK_WIND, the entry is taken
+ off the list. If it is non-sync write,
+ then we continue to track it via @liability
+ or @temptation depending on the status
+ of its writeback.
+ */
+ list_head_t liability; /* Non-sync writes which are lied
+ (STACK_UNWIND'ed to caller) but ack
+ from server not yet complete. This
+ is the "liability" which we hold, and
+ must guarantee that dependent operations
+ which arrive later (which overlap, etc.)
+ are issued only after their dependencies
+ in this list are "fulfilled".
+
+ Server acks for entries in this list
+ shrinks the window.
+
+ The sum total of all req->write_size
+ of entries in this list must be kept less
+ than the permitted window size.
+ */
+ list_head_t temptation; /* Operations for which we are tempted
+ to 'lie' (write-behind), but temporarily
+ holding off (because of insufficient
+ window capacity, etc.)
+
+ This is the list to look at to grow
+ the window (in __wb_pick_unwinds()).
+
+ Entries typically get chosen from
+ write-behind from this list, and therefore
+ get "upgraded" to the "liability" list.
+ */
+ list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC
+ which are currently STACK_WIND'ed towards the server.
+ This is for guaranteeing that no two overlapping
+ writes are in progress at the same time. Modules
+ like eager-lock in AFR depend on this behavior.
+ */
+ list_head_t invalidate_list; /* list of wb_inodes that were marked for
+ * iatt invalidation due to requests in
+ * liability queue fulfilled while there
+ * was a readdirp session on parent
+ * directory. For a directory inode, this
+ * list points to list of children.
+ */
+ uint64_t gen; /* Liability generation number. Represents
+ the current 'state' of liability. Every
+ new addition to the liability list bumps
+ the generation number.
+
+ a newly arrived request is only required
+ to perform causal checks against the entries
+ in the liability list which were present
+ at the time of its addition. the generation
+ number at the time of its addition is stored
+ in the request and used during checks.
+
+ the liability list can grow while the request
+ waits in the todo list waiting for its
+ dependent operations to complete. however
+ it is not of the request's concern to depend
+ itself on those new entries which arrived
+ after it arrived (i.e, those that have a
+ liability generation higher than itself)
+ */
+ size_t size; /* Size of the file to catch write after EOF. */
+ gf_lock_t lock;
+ xlator_t *this;
+ inode_t *inode;
+ int dontsync; /* If positive, don't pick lies for
+ * winding. This is needed to break infinite
+ * recursion during invocation of
+ * wb_process_queue from
+ * wb_fulfill_cbk in case of an
+ * error during fulfill.
+ */
+ gf_atomic_int32_t readdirps;
+ gf_atomic_int8_t invalidate;
+} wb_inode_t;
typedef struct wb_request {
- list_head_t all;
- list_head_t todo;
- list_head_t lie; /* either in @liability or @temptation */
- list_head_t winds;
- list_head_t unwinds;
- list_head_t wip;
-
- call_stub_t *stub;
-
- ssize_t write_size; /* currently held size
- (after collapsing) */
- size_t orig_size; /* size which arrived with the request.
- This is the size by which we grow
- the window when unwinding the frame.
- */
- size_t total_size; /* valid only in @head in wb_fulfill().
- This is the size with which we perform
- STACK_WIND to server and therefore the
- amount by which we shrink the window.
- */
-
- int op_ret;
- int op_errno;
-
- int32_t refcount;
- wb_inode_t *wb_inode;
- glusterfs_fop_t fop;
- gf_lkowner_t lk_owner;
- struct iobref *iobref;
- uint64_t gen; /* inode liability state at the time of
- request arrival */
-
- fd_t *fd;
- struct {
- size_t size; /* 0 size == till infinity */
- off_t off;
- int append:1; /* offset is invalid. only one
- outstanding append at a time */
- int tempted:1; /* true only for non-sync writes */
- int lied:1; /* sin committed */
- int fulfilled:1; /* got server acknowledgement */
- int go:1; /* enough aggregating, good to go */
- } ordering;
+ list_head_t all;
+ list_head_t todo;
+ list_head_t lie; /* either in @liability or @temptation */
+ list_head_t winds;
+ list_head_t unwinds;
+ list_head_t wip;
+
+ call_stub_t *stub;
+
+ ssize_t write_size; /* currently held size
+ (after collapsing) */
+ size_t orig_size; /* size which arrived with the request.
+ This is the size by which we grow
+ the window when unwinding the frame.
+ */
+ size_t total_size; /* valid only in @head in wb_fulfill().
+ This is the size with which we perform
+ STACK_WIND to server and therefore the
+ amount by which we shrink the window.
+ */
+
+ int op_ret;
+ int op_errno;
+
+ int32_t refcount;
+ wb_inode_t *wb_inode;
+ glusterfs_fop_t fop;
+ gf_lkowner_t lk_owner;
+ pid_t client_pid;
+ struct iobref *iobref;
+ uint64_t gen; /* inode liability state at the time of
+ request arrival */
+
+ fd_t *fd;
+ int wind_count; /* number of sync-attempts. Only
+ for debug purposes */
+ struct {
+ size_t size; /* 0 size == till infinity */
+ off_t off;
+ int append : 1; /* offset is invalid. only one
+ outstanding append at a time */
+ int tempted : 1; /* true only for non-sync writes */
+ int lied : 1; /* sin committed */
+ int fulfilled : 1; /* got server acknowledgement */
+ int go : 1; /* enough aggregating, good to go */
+ } ordering;
+
+ /* for debug purposes. A request might outlive the fop it is
+ * representing. So, preserve essential info for logging.
+ */
+ uint64_t unique;
+ uuid_t gfid;
} wb_request_t;
-
typedef struct wb_conf {
- uint64_t aggregate_size;
- uint64_t window_size;
- gf_boolean_t flush_behind;
- gf_boolean_t trickling_writes;
- gf_boolean_t strict_write_ordering;
- gf_boolean_t strict_O_DIRECT;
+ uint64_t aggregate_size;
+ uint64_t page_size;
+ uint64_t window_size;
+ gf_boolean_t flush_behind;
+ gf_boolean_t trickling_writes;
+ gf_boolean_t strict_write_ordering;
+ gf_boolean_t strict_O_DIRECT;
+ gf_boolean_t resync_after_fsync;
} wb_conf_t;
-
-void
-wb_process_queue (wb_inode_t *wb_inode);
-
-
wb_inode_t *
-__wb_inode_ctx_get (xlator_t *this, inode_t *inode)
+__wb_inode_ctx_get(xlator_t *this, inode_t *inode)
{
- uint64_t value = 0;
- wb_inode_t *wb_inode = NULL;
+ uint64_t value = 0;
+ wb_inode_t *wb_inode = NULL;
+ int ret = 0;
- __inode_ctx_get (inode, this, &value);
- wb_inode = (wb_inode_t *)(unsigned long) value;
+ ret = __inode_ctx_get(inode, this, &value);
+ if (ret)
+ return NULL;
- return wb_inode;
-}
+ wb_inode = (wb_inode_t *)(unsigned long)value;
+ return wb_inode;
+}
wb_inode_t *
-wb_inode_ctx_get (xlator_t *this, inode_t *inode)
+wb_inode_ctx_get(xlator_t *this, inode_t *inode)
{
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- LOCK (&inode->lock);
- {
- wb_inode = __wb_inode_ctx_get (this, inode);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get(this, inode);
+ }
+ UNLOCK(&inode->lock);
out:
- return wb_inode;
+ return wb_inode;
}
-
-gf_boolean_t
-wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno)
+static void
+wb_set_invalidate(wb_inode_t *wb_inode)
{
- gf_boolean_t err = _gf_false;
- uint64_t value = 0;
- int32_t tmp = 0;
+ int readdirps = 0;
+ inode_t *parent_inode = NULL;
+ wb_inode_t *wb_parent_inode = NULL;
- if (fd_ctx_get (fd, this, &value) == 0) {
- if (op_errno) {
- tmp = value;
- *op_errno = tmp;
- }
+ parent_inode = inode_parent(wb_inode->inode, NULL, NULL);
+ if (parent_inode)
+ wb_parent_inode = wb_inode_ctx_get(wb_inode->this, parent_inode);
- err = _gf_true;
+ if (wb_parent_inode) {
+ LOCK(&wb_parent_inode->lock);
+ {
+ readdirps = GF_ATOMIC_GET(wb_parent_inode->readdirps);
+ if (readdirps && list_empty(&wb_inode->invalidate_list)) {
+ inode_ref(wb_inode->inode);
+ GF_ATOMIC_INIT(wb_inode->invalidate, 1);
+ list_add(&wb_inode->invalidate_list,
+ &wb_parent_inode->invalidate_list);
+ }
}
+ UNLOCK(&wb_parent_inode->lock);
+ } else {
+ GF_ATOMIC_INIT(wb_inode->invalidate, 0);
+ }
+
+ if (parent_inode)
+ inode_unref(parent_inode);
- return err;
+ return;
}
+void
+wb_process_queue(wb_inode_t *wb_inode);
/*
Below is a succinct explanation of the code deciding whether two regions
@@ -250,1868 +286,2993 @@ wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno)
*/
gf_boolean_t
-wb_requests_overlap (wb_request_t *req1, wb_request_t *req2)
+wb_requests_overlap(wb_request_t *req1, wb_request_t *req2)
{
- uint64_t r1_start = 0;
- uint64_t r1_end = 0;
- uint64_t r2_start = 0;
- uint64_t r2_end = 0;
- enum _gf_boolean do_overlap = 0;
-
- r1_start = req1->ordering.off;
- if (req1->ordering.size)
- r1_end = r1_start + req1->ordering.size - 1;
- else
- r1_end = ULLONG_MAX;
-
- r2_start = req2->ordering.off;
- if (req2->ordering.size)
- r2_end = r2_start + req2->ordering.size - 1;
- else
- r2_end = ULLONG_MAX;
-
- do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start));
-
- return do_overlap;
+ uint64_t r1_start = 0;
+ uint64_t r1_end = 0;
+ uint64_t r2_start = 0;
+ uint64_t r2_end = 0;
+ gf_boolean_t do_overlap = _gf_false;
+
+ r1_start = req1->ordering.off;
+ if (req1->ordering.size)
+ r1_end = r1_start + req1->ordering.size - 1;
+ else
+ r1_end = ULLONG_MAX;
+
+ r2_start = req2->ordering.off;
+ if (req2->ordering.size)
+ r2_end = r2_start + req2->ordering.size - 1;
+ else
+ r2_end = ULLONG_MAX;
+
+ do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start));
+
+ return do_overlap;
}
-
gf_boolean_t
-wb_requests_conflict (wb_request_t *lie, wb_request_t *req)
+wb_requests_conflict(wb_request_t *lie, wb_request_t *req)
{
- wb_conf_t *conf = NULL;
+ wb_conf_t *conf = NULL;
- conf = req->wb_inode->this->private;
+ conf = req->wb_inode->this->private;
- if (lie == req)
- /* request cannot conflict with itself */
- return _gf_false;
+ if (lie == req)
+ /* request cannot conflict with itself */
+ return _gf_false;
- if (lie->gen >= req->gen)
- /* this liability entry was behind
- us in the todo list */
- return _gf_false;
+ if (lie->gen >= req->gen)
+ /* this liability entry was behind
+ us in the todo list */
+ return _gf_false;
- if (lie->ordering.append)
- /* all modifications wait for the completion
- of outstanding append */
- return _gf_true;
+ if (lie->ordering.append)
+ /* all modifications wait for the completion
+ of outstanding append */
+ return _gf_true;
- if (conf->strict_write_ordering)
- /* We are sure (lie->gen < req->gen) by now. So
- skip overlap check if strict write ordering is
- requested and always return "conflict" against a
- lower generation lie. */
- return _gf_true;
+ if (conf->strict_write_ordering)
+ /* We are sure (lie->gen < req->gen) by now. So
+ skip overlap check if strict write ordering is
+ requested and always return "conflict" against a
+ lower generation lie. */
+ return _gf_true;
- return wb_requests_overlap (lie, req);
+ return wb_requests_overlap(lie, req);
}
-
-gf_boolean_t
-wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
+wb_request_t *
+wb_liability_has_conflict(wb_inode_t *wb_inode, wb_request_t *req)
{
- wb_request_t *each = NULL;
-
- list_for_each_entry (each, &wb_inode->liability, lie) {
- if (wb_requests_conflict (each, req))
- return _gf_true;
- }
-
- return _gf_false;
+ wb_request_t *each = NULL;
+
+ list_for_each_entry(each, &wb_inode->liability, lie)
+ {
+ if (wb_requests_conflict(each, req) && (!each->ordering.fulfilled))
+ /* A fulfilled request shouldn't block another
+ * request (even a dependent one) from winding.
+ */
+ return each;
+ }
+
+ return NULL;
}
-
-gf_boolean_t
-wb_wip_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
+wb_request_t *
+wb_wip_has_conflict(wb_inode_t *wb_inode, wb_request_t *req)
{
- wb_request_t *each = NULL;
+ wb_request_t *each = NULL;
- if (req->stub->fop != GF_FOP_WRITE)
- /* non-writes fundamentally never conflict with WIP requests */
- return _gf_false;
+ if (req->stub->fop != GF_FOP_WRITE)
+ /* non-writes fundamentally never conflict with WIP requests */
+ return NULL;
- list_for_each_entry (each, &wb_inode->wip, wip) {
- if (each == req)
- /* request never conflicts with itself,
- though this condition should never occur.
- */
- continue;
+ list_for_each_entry(each, &wb_inode->wip, wip)
+ {
+ if (each == req)
+ /* request never conflicts with itself,
+ though this condition should never occur.
+ */
+ continue;
- if (wb_requests_overlap (each, req))
- return _gf_true;
- }
+ if (wb_requests_overlap(each, req))
+ return each;
+ }
- return _gf_false;
+ return NULL;
}
-
static int
-__wb_request_unref (wb_request_t *req)
+__wb_request_unref(wb_request_t *req)
{
- int ret = -1;
- wb_inode_t *wb_inode = NULL;
-
- wb_inode = req->wb_inode;
-
- if (req->refcount <= 0) {
- gf_log ("wb-request", GF_LOG_WARNING,
- "refcount(%d) is <= 0", req->refcount);
- goto out;
+ int ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ wb_inode = req->wb_inode;
+
+ if (req->refcount <= 0) {
+ uuid_utoa_r(req->gfid, gfid);
+
+ gf_msg(
+ "wb-request", GF_LOG_WARNING, 0, WRITE_BEHIND_MSG_RES_UNAVAILABLE,
+ "(unique=%" PRIu64 ", fop=%s, gfid=%s, gen=%" PRIu64
+ "): "
+ "refcount(%d) is <= 0 ",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen, req->refcount);
+ goto out;
+ }
+
+ ret = --req->refcount;
+ if (req->refcount == 0) {
+ uuid_utoa_r(req->gfid, gfid);
+
+ gf_log_callingfn(wb_inode->this->name, GF_LOG_DEBUG,
+ "(unique = %" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): destroying request, "
+ "removing from all queues",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen);
+
+ list_del_init(&req->todo);
+ list_del_init(&req->lie);
+ list_del_init(&req->wip);
+
+ list_del_init(&req->all);
+ if (list_empty(&wb_inode->all)) {
+ wb_inode->gen = 0;
+ /* in case of accounting errors? */
+ wb_inode->window_current = 0;
}
- ret = --req->refcount;
- if (req->refcount == 0) {
- list_del_init (&req->todo);
- list_del_init (&req->lie);
- list_del_init (&req->wip);
-
- list_del_init (&req->all);
- if (list_empty (&wb_inode->all)) {
- wb_inode->gen = 0;
- /* in case of accounting errors? */
- wb_inode->window_current = 0;
- }
-
- list_del_init (&req->winds);
- list_del_init (&req->unwinds);
+ list_del_init(&req->winds);
+ list_del_init(&req->unwinds);
- if (req->stub && req->ordering.tempted) {
- call_stub_destroy (req->stub);
- req->stub = NULL;
- } /* else we would have call_resume()'ed */
+ if (req->stub) {
+ call_stub_destroy(req->stub);
+ req->stub = NULL;
+ }
- if (req->iobref)
- iobref_unref (req->iobref);
+ if (req->iobref)
+ iobref_unref(req->iobref);
- if (req->fd)
- fd_unref (req->fd);
+ if (req->fd)
+ fd_unref(req->fd);
- GF_FREE (req);
- }
+ GF_FREE(req);
+ }
out:
- return ret;
+ return ret;
}
-
static int
-wb_request_unref (wb_request_t *req)
+wb_request_unref(wb_request_t *req)
{
- wb_inode_t *wb_inode = NULL;
- int ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("write-behind", req, out);
+ GF_VALIDATE_OR_GOTO("write-behind", req, out);
- wb_inode = req->wb_inode;
+ wb_inode = req->wb_inode;
- LOCK (&wb_inode->lock);
- {
- ret = __wb_request_unref (req);
- }
- UNLOCK (&wb_inode->lock);
+ LOCK(&wb_inode->lock);
+ {
+ ret = __wb_request_unref(req);
+ }
+ UNLOCK(&wb_inode->lock);
out:
- return ret;
+ return ret;
}
-
static wb_request_t *
-__wb_request_ref (wb_request_t *req)
+__wb_request_ref(wb_request_t *req)
{
- GF_VALIDATE_OR_GOTO ("write-behind", req, out);
+ GF_VALIDATE_OR_GOTO("write-behind", req, out);
- if (req->refcount < 0) {
- gf_log ("wb-request", GF_LOG_WARNING,
- "refcount(%d) is < 0", req->refcount);
- req = NULL;
- goto out;
- }
+ if (req->refcount < 0) {
+ gf_msg("wb-request", GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_RES_UNAVAILABLE, "refcount(%d) is < 0",
+ req->refcount);
+ req = NULL;
+ goto out;
+ }
- req->refcount++;
+ req->refcount++;
out:
- return req;
+ return req;
}
-
wb_request_t *
-wb_request_ref (wb_request_t *req)
+wb_request_ref(wb_request_t *req)
{
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", req, out);
+ GF_VALIDATE_OR_GOTO("write-behind", req, out);
- wb_inode = req->wb_inode;
- LOCK (&wb_inode->lock);
- {
- req = __wb_request_ref (req);
- }
- UNLOCK (&wb_inode->lock);
+ wb_inode = req->wb_inode;
+ LOCK(&wb_inode->lock);
+ {
+ req = __wb_request_ref(req);
+ }
+ UNLOCK(&wb_inode->lock);
out:
- return req;
+ return req;
}
-
gf_boolean_t
-wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
-{
- wb_request_t *req = NULL;
-
- GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
- GF_VALIDATE_OR_GOTO (wb_inode->this->name, stub, out);
-
- req = GF_CALLOC (1, sizeof (*req), gf_wb_mt_wb_request_t);
- if (!req)
- goto out;
-
- INIT_LIST_HEAD (&req->all);
- INIT_LIST_HEAD (&req->todo);
- INIT_LIST_HEAD (&req->lie);
- INIT_LIST_HEAD (&req->winds);
- INIT_LIST_HEAD (&req->unwinds);
- INIT_LIST_HEAD (&req->wip);
-
- req->stub = stub;
- req->wb_inode = wb_inode;
- req->fop = stub->fop;
- req->ordering.tempted = tempted;
-
- if (stub->fop == GF_FOP_WRITE) {
- req->write_size = iov_length (stub->args.vector,
- stub->args.count);
-
- /* req->write_size can change as we collapse
- small writes. But the window needs to grow
- only by how much we acknowledge the app. so
- copy the original size in orig_size for the
- purpose of accounting.
- */
- req->orig_size = req->write_size;
-
- /* Let's be optimistic that we can
- lie about it
- */
- req->op_ret = req->write_size;
- req->op_errno = 0;
-
- if (stub->args.fd->flags & O_APPEND)
- req->ordering.append = 1;
- }
-
- req->lk_owner = stub->frame->root->lk_owner;
-
- switch (stub->fop) {
- case GF_FOP_WRITE:
- req->ordering.off = stub->args.offset;
- req->ordering.size = req->write_size;
-
- req->fd = fd_ref (stub->args.fd);
-
- break;
- case GF_FOP_READ:
- req->ordering.off = stub->args.offset;
- req->ordering.size = stub->args.size;
-
- req->fd = fd_ref (stub->args.fd);
-
- break;
- case GF_FOP_TRUNCATE:
- req->ordering.off = stub->args.offset;
- req->ordering.size = 0; /* till infinity */
- break;
- case GF_FOP_FTRUNCATE:
- req->ordering.off = stub->args.offset;
- req->ordering.size = 0; /* till infinity */
-
- req->fd = fd_ref (stub->args.fd);
-
- break;
- default:
- break;
- }
-
- LOCK (&wb_inode->lock);
- {
- list_add_tail (&req->all, &wb_inode->all);
-
- req->gen = wb_inode->gen;
-
- list_add_tail (&req->todo, &wb_inode->todo);
- __wb_request_ref (req); /* for wind */
+wb_enqueue_common(wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
+{
+ wb_request_t *req = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("write-behind", wb_inode, out);
+ GF_VALIDATE_OR_GOTO(wb_inode->this->name, stub, out);
+
+ req = GF_CALLOC(1, sizeof(*req), gf_wb_mt_wb_request_t);
+ if (!req)
+ goto out;
+
+ INIT_LIST_HEAD(&req->all);
+ INIT_LIST_HEAD(&req->todo);
+ INIT_LIST_HEAD(&req->lie);
+ INIT_LIST_HEAD(&req->winds);
+ INIT_LIST_HEAD(&req->unwinds);
+ INIT_LIST_HEAD(&req->wip);
+
+ req->stub = stub;
+ req->wb_inode = wb_inode;
+ req->fop = stub->fop;
+ req->ordering.tempted = tempted;
+ req->unique = stub->frame->root->unique;
+
+ inode = ((stub->args.fd != NULL) ? stub->args.fd->inode
+ : stub->args.loc.inode);
+
+ if (inode)
+ gf_uuid_copy(req->gfid, inode->gfid);
+
+ if (stub->fop == GF_FOP_WRITE) {
+ req->write_size = iov_length(stub->args.vector, stub->args.count);
+
+ /* req->write_size can change as we collapse
+ small writes. But the window needs to grow
+ only by how much we acknowledge the app. so
+ copy the original size in orig_size for the
+ purpose of accounting.
+ */
+ req->orig_size = req->write_size;
+
+ /* Let's be optimistic that we can
+ lie about it
+ */
+ req->op_ret = req->write_size;
+ req->op_errno = 0;
+
+ if (stub->args.fd && (stub->args.fd->flags & O_APPEND))
+ req->ordering.append = 1;
+ }
+
+ req->lk_owner = stub->frame->root->lk_owner;
+ req->client_pid = stub->frame->root->pid;
+
+ switch (stub->fop) {
+ case GF_FOP_WRITE:
+ LOCK(&wb_inode->lock);
+ {
+ if (wb_inode->size < stub->args.offset) {
+ req->ordering.off = wb_inode->size;
+ req->ordering.size = stub->args.offset + req->write_size -
+ wb_inode->size;
+ } else {
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = req->write_size;
+ }
- if (req->ordering.tempted) {
- list_add_tail (&req->lie, &wb_inode->temptation);
- __wb_request_ref (req); /* for unwind */
- }
+ if (wb_inode->size < stub->args.offset + req->write_size)
+ wb_inode->size = stub->args.offset + req->write_size;
+ }
+ UNLOCK(&wb_inode->lock);
+
+ req->fd = fd_ref(stub->args.fd);
+
+ break;
+ case GF_FOP_READ:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = stub->args.size;
+
+ req->fd = fd_ref(stub->args.fd);
+
+ break;
+ case GF_FOP_TRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->size = req->ordering.off;
+ }
+ UNLOCK(&wb_inode->lock);
+ break;
+ case GF_FOP_FTRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->size = req->ordering.off;
+ }
+ UNLOCK(&wb_inode->lock);
+
+ req->fd = fd_ref(stub->args.fd);
+
+ break;
+ default:
+ if (stub && stub->args.fd)
+ req->fd = fd_ref(stub->args.fd);
+
+ break;
+ }
+
+ LOCK(&wb_inode->lock);
+ {
+ list_add_tail(&req->all, &wb_inode->all);
+
+ req->gen = wb_inode->gen;
+
+ list_add_tail(&req->todo, &wb_inode->todo);
+ __wb_request_ref(req); /* for wind */
+
+ if (req->ordering.tempted) {
+ list_add_tail(&req->lie, &wb_inode->temptation);
+ __wb_request_ref(req); /* for unwind */
}
- UNLOCK (&wb_inode->lock);
+ }
+ UNLOCK(&wb_inode->lock);
out:
- if (!req)
- return _gf_false;
+ if (!req)
+ return _gf_false;
- return _gf_true;
+ return _gf_true;
}
-
gf_boolean_t
-wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub)
+wb_enqueue(wb_inode_t *wb_inode, call_stub_t *stub)
{
- return wb_enqueue_common (wb_inode, stub, 0);
+ return wb_enqueue_common(wb_inode, stub, 0);
}
-
gf_boolean_t
-wb_enqueue_tempted (wb_inode_t *wb_inode, call_stub_t *stub)
+wb_enqueue_tempted(wb_inode_t *wb_inode, call_stub_t *stub)
{
- return wb_enqueue_common (wb_inode, stub, 1);
+ return wb_enqueue_common(wb_inode, stub, 1);
}
-
wb_inode_t *
-__wb_inode_create (xlator_t *this, inode_t *inode)
+__wb_inode_create(xlator_t *this, inode_t *inode)
{
- wb_inode_t *wb_inode = NULL;
- wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- conf = this->private;
+ conf = this->private;
- wb_inode = GF_CALLOC (1, sizeof (*wb_inode), gf_wb_mt_wb_inode_t);
- if (!wb_inode)
- goto out;
+ wb_inode = GF_CALLOC(1, sizeof(*wb_inode), gf_wb_mt_wb_inode_t);
+ if (!wb_inode)
+ goto out;
- INIT_LIST_HEAD (&wb_inode->all);
- INIT_LIST_HEAD (&wb_inode->todo);
- INIT_LIST_HEAD (&wb_inode->liability);
- INIT_LIST_HEAD (&wb_inode->temptation);
- INIT_LIST_HEAD (&wb_inode->wip);
+ INIT_LIST_HEAD(&wb_inode->all);
+ INIT_LIST_HEAD(&wb_inode->todo);
+ INIT_LIST_HEAD(&wb_inode->liability);
+ INIT_LIST_HEAD(&wb_inode->temptation);
+ INIT_LIST_HEAD(&wb_inode->wip);
+ INIT_LIST_HEAD(&wb_inode->invalidate_list);
- wb_inode->this = this;
+ wb_inode->this = this;
- wb_inode->window_conf = conf->window_size;
+ wb_inode->window_conf = conf->window_size;
+ wb_inode->inode = inode;
- LOCK_INIT (&wb_inode->lock);
+ LOCK_INIT(&wb_inode->lock);
+ GF_ATOMIC_INIT(wb_inode->invalidate, 0);
+ GF_ATOMIC_INIT(wb_inode->readdirps, 0);
- __inode_ctx_put (inode, this, (uint64_t)(unsigned long)wb_inode);
+ ret = __inode_ctx_put(inode, this, (uint64_t)(unsigned long)wb_inode);
+ if (ret) {
+ GF_FREE(wb_inode);
+ wb_inode = NULL;
+ }
out:
- return wb_inode;
+ return wb_inode;
}
-
wb_inode_t *
-wb_inode_create (xlator_t *this, inode_t *inode)
+wb_inode_create(xlator_t *this, inode_t *inode)
{
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- LOCK (&inode->lock);
- {
- wb_inode = __wb_inode_ctx_get (this, inode);
- if (!wb_inode)
- wb_inode = __wb_inode_create (this, inode);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get(this, inode);
+ if (!wb_inode)
+ wb_inode = __wb_inode_create(this, inode);
+ }
+ UNLOCK(&inode->lock);
out:
- return wb_inode;
+ return wb_inode;
}
-
void
-wb_inode_destroy (wb_inode_t *wb_inode)
+wb_inode_destroy(wb_inode_t *wb_inode)
{
- GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
+ GF_VALIDATE_OR_GOTO("write-behind", wb_inode, out);
+
+ GF_ASSERT(list_empty(&wb_inode->todo));
+ GF_ASSERT(list_empty(&wb_inode->liability));
+ GF_ASSERT(list_empty(&wb_inode->temptation));
- LOCK_DESTROY (&wb_inode->lock);
- GF_FREE (wb_inode);
+ LOCK_DESTROY(&wb_inode->lock);
+ GF_FREE(wb_inode);
out:
- return;
+ return;
}
-
void
-__wb_fulfill_request (wb_request_t *req)
+__wb_fulfill_request(wb_request_t *req)
{
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ wb_inode = req->wb_inode;
+
+ req->ordering.fulfilled = 1;
+ wb_inode->window_current -= req->total_size;
+ wb_inode->transit -= req->total_size;
+
+ uuid_utoa_r(req->gfid, gfid);
+
+ gf_log_callingfn(wb_inode->this->name, GF_LOG_DEBUG,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): request fulfilled. "
+ "removing the request from liability queue? = %s",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen,
+ req->ordering.lied ? "yes" : "no");
+
+ if (req->ordering.lied) {
+ /* 1. If yes, request is in liability queue and hence can be
+ safely removed from list.
+ 2. If no, request is in temptation queue and hence should be
+ left in the queue so that wb_pick_unwinds picks it up
+ */
+ list_del_init(&req->lie);
+ } else {
+ /* TODO: fail the req->frame with error if
+ necessary
+ */
+ }
+
+ list_del_init(&req->wip);
+ __wb_request_unref(req);
+}
- wb_inode = req->wb_inode;
+/* get a flush/fsync waiting on req */
+wb_request_t *
+__wb_request_waiting_on(wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *trav = NULL;
- req->ordering.fulfilled = 1;
- wb_inode->window_current -= req->total_size;
- wb_inode->transit -= req->total_size;
+ wb_inode = req->wb_inode;
- if (!req->ordering.lied) {
- /* TODO: fail the req->frame with error if
- necessary
- */
- }
+ list_for_each_entry(trav, &wb_inode->todo, todo)
+ {
+ if (((trav->stub->fop == GF_FOP_FLUSH) ||
+ (trav->stub->fop == GF_FOP_FSYNC)) &&
+ (trav->gen >= req->gen))
+ return trav;
+ }
- __wb_request_unref (req);
+ return NULL;
}
-
void
-wb_head_done (wb_request_t *head)
+__wb_add_request_for_retry(wb_request_t *req)
{
- wb_request_t *req = NULL;
- wb_request_t *tmp = NULL;
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
- wb_inode = head->wb_inode;
+ if (!req)
+ goto out;
- LOCK (&wb_inode->lock);
- {
- list_for_each_entry_safe (req, tmp, &head->winds, winds) {
- __wb_fulfill_request (req);
- }
- __wb_fulfill_request (head);
- }
- UNLOCK (&wb_inode->lock);
-}
+ wb_inode = req->wb_inode;
+ /* response was unwound and no waiter waiting on this request, retry
+ till a flush or fsync (subject to conf->resync_after_fsync).
+ */
+ wb_inode->transit -= req->total_size;
-void
-wb_fulfill_err (wb_request_t *head, int op_errno)
-{
- wb_inode_t *wb_inode;
- wb_request_t *req;
+ req->total_size = 0;
- wb_inode = head->wb_inode;
+ list_del_init(&req->winds);
+ list_del_init(&req->todo);
+ list_del_init(&req->wip);
- /* for all future requests yet to arrive */
- fd_ctx_set (head->fd, THIS, op_errno);
+ /* sanitize ordering flags to retry */
+ req->ordering.go = 0;
- LOCK (&wb_inode->lock);
- {
- /* for all requests already arrived */
- list_for_each_entry (req, &wb_inode->all, all) {
- if (req->fd != head->fd)
- continue;
- req->op_ret = -1;
- req->op_errno = op_errno;
- }
- }
- UNLOCK (&wb_inode->lock);
-}
+ /* Add back to todo list to retry */
+ list_add(&req->todo, &wb_inode->todo);
+out:
+ return;
+}
-int
-wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+void
+__wb_add_head_for_retry(wb_request_t *head)
{
- wb_inode_t *wb_inode = NULL;
- wb_request_t *head = NULL;
-
- head = frame->local;
- frame->local = NULL;
-
- wb_inode = head->wb_inode;
+ wb_request_t *req = NULL, *tmp = NULL;
- if (op_ret == -1) {
- wb_fulfill_err (head, op_errno);
- } else if (op_ret < head->total_size) {
- /*
- * We've encountered a short write, for whatever reason.
- * Set an EIO error for the next fop. This should be
- * valid for writev or flush (close).
- *
- * TODO: Retry the write so we can potentially capture
- * a real error condition (i.e., ENOSPC).
- */
- wb_fulfill_err (head, EIO);
- }
+ if (!head)
+ goto out;
- wb_head_done (head);
+ list_for_each_entry_safe_reverse(req, tmp, &head->winds, winds)
+ {
+ __wb_add_request_for_retry(req);
+ }
- wb_process_queue (wb_inode);
+ __wb_add_request_for_retry(head);
- STACK_DESTROY (frame->root);
-
- return 0;
+out:
+ return;
}
+void
+wb_add_head_for_retry(wb_request_t *head)
+{
+ if (!head)
+ goto out;
-#define WB_IOV_LOAD(vec, cnt, req, head) do { \
- memcpy (&vec[cnt], req->stub->args.vector, \
- (req->stub->args.count * sizeof(vec[0]))); \
- cnt += req->stub->args.count; \
- head->total_size += req->write_size; \
- } while (0)
+ LOCK(&head->wb_inode->lock);
+ {
+ __wb_add_head_for_retry(head);
+ }
+ UNLOCK(&head->wb_inode->lock);
+out:
+ return;
+}
void
-wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
+__wb_fulfill_request_err(wb_request_t *req, int32_t op_errno)
{
- struct iovec vector[MAX_VECTOR_COUNT];
- int count = 0;
- wb_request_t *req = NULL;
- call_frame_t *frame = NULL;
- gf_boolean_t fderr = _gf_false;
- xlator_t *this = NULL;
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *waiter = NULL;
+ wb_conf_t *conf = NULL;
+
+ wb_inode = req->wb_inode;
+
+ conf = wb_inode->this->private;
+
+ req->op_ret = -1;
+ req->op_errno = op_errno;
+
+ if (req->ordering.lied)
+ waiter = __wb_request_waiting_on(req);
+
+ if (!req->ordering.lied || waiter) {
+ if (!req->ordering.lied) {
+ /* response to app is still pending, send failure in
+ * response.
+ */
+ } else {
+ /* response was sent, store the error in a
+ * waiter (either an fsync or flush).
+ */
+ waiter->op_ret = -1;
+ waiter->op_errno = op_errno;
+ }
- this = THIS;
+ if (!req->ordering.lied || (waiter->stub->fop == GF_FOP_FLUSH) ||
+ ((waiter->stub->fop == GF_FOP_FSYNC) &&
+ !conf->resync_after_fsync)) {
+ /* No retry needed, forget the request */
+ __wb_fulfill_request(req);
+ return;
+ }
+ }
- /* make sure head->total_size is updated before we run into any
- * errors
- */
+ __wb_add_request_for_retry(req);
- WB_IOV_LOAD (vector, count, head, head);
+ return;
+}
- list_for_each_entry (req, &head->winds, winds) {
- WB_IOV_LOAD (vector, count, req, head);
+void
+wb_head_done(wb_request_t *head)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_inode_t *wb_inode = NULL;
- iobref_merge (head->stub->args.iobref,
- req->stub->args.iobref);
- }
+ wb_inode = head->wb_inode;
- if (wb_fd_err (head->fd, this, NULL)) {
- fderr = _gf_true;
- goto err;
+ LOCK(&wb_inode->lock);
+ {
+ list_for_each_entry_safe(req, tmp, &head->winds, winds)
+ {
+ __wb_fulfill_request(req);
}
- frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
- if (!frame)
- goto err;
+ __wb_fulfill_request(head);
+ }
+ UNLOCK(&wb_inode->lock);
+}
- frame->root->lk_owner = head->lk_owner;
- frame->local = head;
+void
+__wb_fulfill_err(wb_request_t *head, int op_errno)
+{
+ wb_request_t *req = NULL, *tmp = NULL;
- LOCK (&wb_inode->lock);
- {
- wb_inode->transit += head->total_size;
- }
- UNLOCK (&wb_inode->lock);
+ if (!head)
+ goto out;
- STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->writev,
- head->fd, vector, count,
- head->stub->args.offset,
- head->stub->args.flags,
- head->stub->args.iobref, NULL);
+ head->wb_inode->dontsync++;
- return;
-err:
- if (!fderr) {
- /* frame creation failure */
- wb_fulfill_err (head, ENOMEM);
- }
+ list_for_each_entry_safe_reverse(req, tmp, &head->winds, winds)
+ {
+ __wb_fulfill_request_err(req, op_errno);
+ }
- wb_head_done (head);
+ __wb_fulfill_request_err(head, op_errno);
- return;
+out:
+ return;
}
+void
+wb_fulfill_err(wb_request_t *head, int op_errno)
+{
+ wb_inode_t *wb_inode = NULL;
-#define NEXT_HEAD(head, req) do { \
- if (head) \
- wb_fulfill_head (wb_inode, head); \
- head = req; \
- expected_offset = req->stub->args.offset + \
- req->write_size; \
- curr_aggregate = 0; \
- vector_count = 0; \
- } while (0)
+ wb_inode = head->wb_inode;
+ LOCK(&wb_inode->lock);
+ {
+ __wb_fulfill_err(head, op_errno);
+ }
+ UNLOCK(&wb_inode->lock);
+}
void
-wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
+__wb_modify_write_request(wb_request_t *req, int synced_size)
{
- wb_request_t *req = NULL;
- wb_request_t *head = NULL;
- wb_request_t *tmp = NULL;
- wb_conf_t *conf = NULL;
- off_t expected_offset = 0;
- size_t curr_aggregate = 0;
- size_t vector_count = 0;
+ struct iovec *vector = NULL;
+ int count = 0;
- conf = wb_inode->this->private;
+ if (!req || synced_size == 0)
+ goto out;
- list_for_each_entry_safe (req, tmp, liabilities, winds) {
- list_del_init (&req->winds);
+ req->write_size -= synced_size;
+ req->stub->args.offset += synced_size;
- if (!head) {
- NEXT_HEAD (head, req);
- continue;
- }
+ vector = req->stub->args.vector;
+ count = req->stub->args.count;
- if (req->fd != head->fd) {
- NEXT_HEAD (head, req);
- continue;
- }
+ req->stub->args.count = iov_skip(vector, count, synced_size);
- if (!is_same_lkowner (&req->lk_owner, &head->lk_owner)) {
- NEXT_HEAD (head, req);
- continue;
- }
-
- if (expected_offset != req->stub->args.offset) {
- NEXT_HEAD (head, req);
- continue;
- }
+out:
+ return;
+}
- if ((curr_aggregate + req->write_size) > conf->aggregate_size) {
- NEXT_HEAD (head, req);
- continue;
- }
+int
+__wb_fulfill_short_write(wb_request_t *req, int size, gf_boolean_t *fulfilled)
+{
+ int accounted_size = 0;
- if (vector_count + req->stub->args.count >
- MAX_VECTOR_COUNT) {
- NEXT_HEAD (head, req);
- continue;
- }
+ if (req == NULL)
+ goto out;
- list_add_tail (&req->winds, &head->winds);
- curr_aggregate += req->write_size;
- vector_count += req->stub->args.count;
- }
+ if (req->write_size <= size) {
+ accounted_size = req->write_size;
+ __wb_fulfill_request(req);
+ *fulfilled = 1;
+ } else {
+ accounted_size = size;
+ __wb_modify_write_request(req, size);
+ *fulfilled = 0;
+ }
- if (head)
- wb_fulfill_head (wb_inode, head);
- return;
+out:
+ return accounted_size;
}
-
void
-wb_do_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
+wb_fulfill_short_write(wb_request_t *head, int size)
{
- wb_request_t *req = NULL;
- wb_request_t *tmp = NULL;
- call_frame_t *frame = NULL;
- struct iatt buf = {0, };
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *req = NULL, *next = NULL;
+ int accounted_size = 0;
+ gf_boolean_t fulfilled = _gf_false;
+
+ if (!head)
+ goto out;
+
+ wb_inode = head->wb_inode;
+
+ req = head;
+
+ LOCK(&wb_inode->lock);
+ {
+ /* hold a reference to head so that __wb_fulfill_short_write
+ * won't free it. We need head for a cleaner list traversal as
+ * list_for_each_entry_safe doesn't iterate over "head" member.
+ * So, if we pass "next->winds" as head to list_for_each_entry,
+ * "next" is skipped. For a simpler logic we need to traverse
+ * the list in the order. So, we start traversal from
+ * "head->winds" and hence we want head to be alive.
+ */
+ __wb_request_ref(head);
+
+ next = list_entry(head->winds.next, wb_request_t, winds);
+
+ accounted_size = __wb_fulfill_short_write(head, size, &fulfilled);
- list_for_each_entry_safe (req, tmp, lies, unwinds) {
- frame = req->stub->frame;
+ size -= accounted_size;
- STACK_UNWIND_STRICT (writev, frame, req->op_ret, req->op_errno,
- &buf, &buf, NULL); /* :O */
- req->stub->frame = NULL;
+ if (size == 0) {
+ if (fulfilled && (next != head))
+ req = next;
- list_del_init (&req->unwinds);
- wb_request_unref (req);
+ goto done;
}
- return;
+ list_for_each_entry_safe(req, next, &head->winds, winds)
+ {
+ accounted_size = __wb_fulfill_short_write(req, size, &fulfilled);
+ size -= accounted_size;
+
+ if (size == 0) {
+ if (fulfilled && (next != head))
+ req = next;
+ break;
+ }
+ }
+ done:
+ __wb_request_unref(head);
+ }
+ UNLOCK(&wb_inode->lock);
+
+ wb_add_head_for_retry(req);
+out:
+ return;
}
+int
+wb_fulfill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *head = NULL;
+
+ head = frame->local;
+ frame->local = NULL;
+
+ wb_inode = head->wb_inode;
+
+ /* There could be a readdirp session in progress. Since wb_fulfill_cbk
+ * can potentially remove a request from liability queue,
+ * wb_readdirp_cbk will miss writes on this inode (as it invalidates
+ * stats only if liability queue is not empty) and hence mark inode
+ * for invalidation of stats in readdirp response. Specifically this
+ * code fixes the following race mentioned in wb_readdirp_cbk:
+ */
+
+ /* <removed comment from wb_readdirp_cbk>
+ * We cannot guarantee integrity of entry->d_stat as there are cached
+ * writes. The stat is most likely stale as it doesn't account the
+ * cached writes. However, checking for non-empty liability list here is
+ * not a fool-proof solution as there can be races like,
+ * 1. readdirp is successful on posix
+ * 2. sync of cached write is successful on posix
+ * 3. write-behind received sync response and removed the request from
+ * liability queue
+ * 4. readdirp response is processed at write-behind
+ *
+ * In the above scenario, stat for the file is sent back in readdirp
+ * response but it is stale.
+ * </comment> */
+ wb_set_invalidate(wb_inode);
+
+ if (op_ret == -1) {
+ wb_fulfill_err(head, op_errno);
+ } else if (op_ret < head->total_size) {
+ wb_fulfill_short_write(head, op_ret);
+ } else {
+ wb_head_done(head);
+ }
+
+ wb_process_queue(wb_inode);
+
+ STACK_DESTROY(frame->root);
+
+ return 0;
+}
-void
-__wb_pick_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
+#define WB_IOV_LOAD(vec, cnt, req, head) \
+ do { \
+ memcpy(&vec[cnt], req->stub->args.vector, \
+ (req->stub->args.count * sizeof(vec[0]))); \
+ cnt += req->stub->args.count; \
+ head->total_size += req->write_size; \
+ } while (0)
+
+int
+wb_fulfill_head(wb_inode_t *wb_inode, wb_request_t *head)
{
- wb_request_t *req = NULL;
- wb_request_t *tmp = NULL;
+ struct iovec vector[MAX_VECTOR_COUNT];
+ int count = 0;
+ wb_request_t *req = NULL;
+ call_frame_t *frame = NULL;
- list_for_each_entry_safe (req, tmp, &wb_inode->temptation, lie) {
- if (!req->ordering.fulfilled &&
- wb_inode->window_current > wb_inode->window_conf)
- continue;
+ /* make sure head->total_size is updated before we run into any
+ * errors
+ */
- list_del_init (&req->lie);
- list_move_tail (&req->unwinds, lies);
+ WB_IOV_LOAD(vector, count, head, head);
- wb_inode->window_current += req->orig_size;
+ list_for_each_entry(req, &head->winds, winds)
+ {
+ WB_IOV_LOAD(vector, count, req, head);
- if (!req->ordering.fulfilled) {
- /* burden increased */
- list_add_tail (&req->lie, &wb_inode->liability);
+ if (iobref_merge(head->stub->args.iobref, req->stub->args.iobref))
+ goto err;
+ }
- req->ordering.lied = 1;
+ frame = create_frame(wb_inode->this, wb_inode->this->ctx->pool);
+ if (!frame)
+ goto err;
- wb_inode->gen++;
- }
- }
+ frame->root->lk_owner = head->lk_owner;
+ frame->root->pid = head->client_pid;
+ frame->local = head;
- return;
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->transit += head->total_size;
+ }
+ UNLOCK(&wb_inode->lock);
+
+ STACK_WIND(frame, wb_fulfill_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->writev, head->fd, vector, count,
+ head->stub->args.offset, head->stub->args.flags,
+ head->stub->args.iobref, NULL);
+
+ return 0;
+err:
+ /* frame creation failure */
+ wb_fulfill_err(head, ENOMEM);
+
+ return ENOMEM;
}
+#define NEXT_HEAD(head, req) \
+ do { \
+ if (head) \
+ ret |= wb_fulfill_head(wb_inode, head); \
+ head = req; \
+ expected_offset = req->stub->args.offset + req->write_size; \
+ curr_aggregate = 0; \
+ vector_count = 0; \
+ } while (0)
int
-__wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req)
-{
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int ret = -1;
- ssize_t required_size = 0;
- size_t holder_len = 0;
- size_t req_len = 0;
-
- if (!holder->iobref) {
- holder_len = iov_length (holder->stub->args.vector,
- holder->stub->args.count);
- req_len = iov_length (req->stub->args.vector,
- req->stub->args.count);
-
- required_size = max ((THIS->ctx->page_size),
- (holder_len + req_len));
- iobuf = iobuf_get2 (req->wb_inode->this->ctx->iobuf_pool,
- required_size);
- if (iobuf == NULL) {
- goto out;
- }
+wb_fulfill(wb_inode_t *wb_inode, list_head_t *liabilities)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *head = NULL;
+ wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
+ off_t expected_offset = 0;
+ size_t curr_aggregate = 0;
+ size_t vector_count = 0;
+ int ret = 0;
+
+ conf = wb_inode->this->private;
+
+ list_for_each_entry_safe(req, tmp, liabilities, winds)
+ {
+ list_del_init(&req->winds);
+
+ if (!head) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
- iobref = iobref_new ();
- if (iobref == NULL) {
- iobuf_unref (iobuf);
- goto out;
- }
+ if (req->fd != head->fd) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
- ret = iobref_add (iobref, iobuf);
- if (ret != 0) {
- iobuf_unref (iobuf);
- iobref_unref (iobref);
- gf_log (req->wb_inode->this->name, GF_LOG_WARNING,
- "cannot add iobuf (%p) into iobref (%p)",
- iobuf, iobref);
- goto out;
- }
+ if (!is_same_lkowner(&req->lk_owner, &head->lk_owner)) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
+
+ if (expected_offset != req->stub->args.offset) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
+
+ if ((curr_aggregate + req->write_size) > conf->aggregate_size) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
+
+ if (vector_count + req->stub->args.count > MAX_VECTOR_COUNT) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
+
+ list_add_tail(&req->winds, &head->winds);
+ curr_aggregate += req->write_size;
+ vector_count += req->stub->args.count;
+ }
+
+ if (head)
+ ret |= wb_fulfill_head(wb_inode, head);
+
+ return ret;
+}
+
+void
+wb_do_unwinds(wb_inode_t *wb_inode, list_head_t *lies)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ call_frame_t *frame = NULL;
+ struct iatt buf = {
+ 0,
+ };
+
+ list_for_each_entry_safe(req, tmp, lies, unwinds)
+ {
+ frame = req->stub->frame;
+
+ STACK_UNWIND_STRICT(writev, frame, req->op_ret, req->op_errno, &buf,
+ &buf, NULL); /* :O */
+ req->stub->frame = NULL;
+
+ list_del_init(&req->unwinds);
+ wb_request_unref(req);
+ }
+
+ return;
+}
- iov_unload (iobuf->ptr, holder->stub->args.vector,
- holder->stub->args.count);
- holder->stub->args.vector[0].iov_base = iobuf->ptr;
- holder->stub->args.count = 1;
+void
+__wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ list_for_each_entry_safe(req, tmp, &wb_inode->temptation, lie)
+ {
+ if (!req->ordering.fulfilled &&
+ wb_inode->window_current > wb_inode->window_conf)
+ continue;
+
+ list_del_init(&req->lie);
+ list_move_tail(&req->unwinds, lies);
+
+ wb_inode->window_current += req->orig_size;
+
+ wb_inode->gen++;
+
+ if (!req->ordering.fulfilled) {
+ /* burden increased */
+ list_add_tail(&req->lie, &wb_inode->liability);
+
+ req->ordering.lied = 1;
+
+ uuid_utoa_r(req->gfid, gfid);
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): added req to liability "
+ "queue. inode-generation-number=%" PRIu64,
+ req->stub->frame->root->unique, gf_fop_list[req->fop],
+ gfid, req->gen, wb_inode->gen);
+ }
+ }
- iobref_unref (holder->stub->args.iobref);
- holder->stub->args.iobref = iobref;
+ return;
+}
- iobuf_unref (iobuf);
+int
+__wb_collapse_small_writes(wb_conf_t *conf, wb_request_t *holder,
+ wb_request_t *req)
+{
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int ret = -1;
+ ssize_t required_size = 0;
+ size_t holder_len = 0;
+ size_t req_len = 0;
+
+ if (!holder->iobref) {
+ holder_len = iov_length(holder->stub->args.vector,
+ holder->stub->args.count);
+ req_len = iov_length(req->stub->args.vector, req->stub->args.count);
+
+ required_size = max((conf->page_size), (holder_len + req_len));
+ iobuf = iobuf_get2(req->wb_inode->this->ctx->iobuf_pool, required_size);
+ if (iobuf == NULL) {
+ goto out;
+ }
- holder->iobref = iobref_ref (iobref);
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ iobuf_unref(iobuf);
+ goto out;
}
- ptr = holder->stub->args.vector[0].iov_base + holder->write_size;
+ ret = iobref_add(iobref, iobuf);
+ if (ret != 0) {
+ gf_msg(req->wb_inode->this->name, GF_LOG_WARNING, -ret,
+ WRITE_BEHIND_MSG_INVALID_ARGUMENT,
+ "cannot add iobuf (%p) into iobref (%p)", iobuf, iobref);
+ iobuf_unref(iobuf);
+ iobref_unref(iobref);
+ goto out;
+ }
- iov_unload (ptr, req->stub->args.vector,
- req->stub->args.count);
+ iov_unload(iobuf->ptr, holder->stub->args.vector,
+ holder->stub->args.count);
+ holder->stub->args.vector[0].iov_base = iobuf->ptr;
+ holder->stub->args.count = 1;
- holder->stub->args.vector[0].iov_len += req->write_size;
- holder->write_size += req->write_size;
- holder->ordering.size += req->write_size;
+ iobref_unref(holder->stub->args.iobref);
+ holder->stub->args.iobref = iobref;
- ret = 0;
+ iobuf_unref(iobuf);
+
+ holder->iobref = iobref_ref(iobref);
+ }
+
+ ptr = holder->stub->args.vector[0].iov_base + holder->write_size;
+
+ iov_unload(ptr, req->stub->args.vector, req->stub->args.count);
+
+ holder->stub->args.vector[0].iov_len += req->write_size;
+ holder->write_size += req->write_size;
+ holder->ordering.size += req->write_size;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
void
-__wb_preprocess_winds (wb_inode_t *wb_inode)
-{
- off_t offset_expected = 0;
- ssize_t space_left = 0;
- wb_request_t *req = NULL;
- wb_request_t *tmp = NULL;
- wb_request_t *holder = NULL;
- wb_conf_t *conf = NULL;
- int ret = 0;
- ssize_t page_size = 0;
-
- /* With asynchronous IO from a VM guest (as a file), there
- can be two sequential writes happening in two regions
- of the file. But individual (broken down) IO requests
- can arrive interleaved.
-
- TODO: cycle for each such sequence sifting
- through the interleaved ops
- */
-
- page_size = wb_inode->this->ctx->page_size;
- conf = wb_inode->this->private;
-
- list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
- if (!req->ordering.tempted) {
- if (holder) {
- if (wb_requests_conflict (holder, req))
- /* do not hold on write if a
- dependent write is in queue */
- holder->ordering.go = 1;
- }
- /* collapse only non-sync writes */
- continue;
- } else if (!holder) {
- /* holder is always a non-sync write */
- holder = req;
- continue;
- }
-
- offset_expected = holder->stub->args.offset
- + holder->write_size;
-
- if (req->stub->args.offset != offset_expected) {
- holder->ordering.go = 1;
- holder = req;
- continue;
- }
-
- if (!is_same_lkowner (&req->lk_owner, &holder->lk_owner)) {
- holder->ordering.go = 1;
- holder = req;
- continue;
- }
-
- if (req->fd != holder->fd) {
- holder->ordering.go = 1;
- holder = req;
- continue;
- }
+__wb_preprocess_winds(wb_inode_t *wb_inode)
+{
+ off_t offset_expected = 0;
+ ssize_t space_left = 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *holder = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
+ ssize_t page_size = 0;
+ char gfid[64] = {
+ 0,
+ };
+
+ /* With asynchronous IO from a VM guest (as a file), there
+ can be two sequential writes happening in two regions
+ of the file. But individual (broken down) IO requests
+ can arrive interleaved.
+
+ TODO: cycle for each such sequence sifting
+ through the interleaved ops
+ */
+
+ conf = wb_inode->this->private;
+ page_size = conf->page_size;
+
+ list_for_each_entry_safe(req, tmp, &wb_inode->todo, todo)
+ {
+ if (wb_inode->dontsync && req->ordering.lied) {
+ /* sync has failed. Don't pick lies _again_ for winding
+ * as winding these lies again will trigger an infinite
+ * recursion of wb_process_queue being called from a
+ * failed fulfill. However, pick non-lied requests for
+ * winding so that application won't block indefinitely
+ * waiting for write result.
+ */
+
+ uuid_utoa_r(req->gfid, gfid);
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): not setting ordering.go"
+ "as dontsync is set",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen);
+
+ continue;
+ }
- space_left = page_size - holder->write_size;
+ if (!req->ordering.tempted) {
+ if (holder) {
+ if (wb_requests_conflict(holder, req))
+ /* do not hold on write if a
+ dependent write is in queue */
+ holder->ordering.go = 1;
+ }
+ /* collapse only non-sync writes */
+ continue;
+ } else if (!holder) {
+ /* holder is always a non-sync write */
+ holder = req;
+ continue;
+ }
- if (space_left < req->write_size) {
- holder->ordering.go = 1;
- holder = req;
- continue;
- }
+ offset_expected = holder->stub->args.offset + holder->write_size;
- ret = __wb_collapse_small_writes (holder, req);
- if (ret)
- continue;
+ if (req->stub->args.offset != offset_expected) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- /* collapsed request is as good as wound
- (from its p.o.v)
- */
- list_del_init (&req->todo);
- __wb_fulfill_request (req);
+ if (!is_same_lkowner(&req->lk_owner, &holder->lk_owner)) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- /* Only the last @holder in queue which
+ if (req->fd != holder->fd) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- - does not have any non-buffered-writes following it
- - has not yet filled its capacity
+ space_left = page_size - holder->write_size;
- does not get its 'go' set, in anticipation of the arrival
- of consecutive smaller writes.
- */
+ if (space_left < req->write_size) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
}
- /* but if trickling writes are enabled, then do not hold back
- writes if there are no outstanding requests
- */
+ ret = __wb_collapse_small_writes(conf, holder, req);
+ if (ret)
+ continue;
- if (conf->trickling_writes && !wb_inode->transit && holder)
- holder->ordering.go = 1;
+ /* collapsed request is as good as wound
+ (from its p.o.v)
+ */
+ list_del_init(&req->todo);
+ __wb_fulfill_request(req);
- return;
-}
+ /* Only the last @holder in queue which
+ - does not have any non-buffered-writes following it
+ - has not yet filled its capacity
-void
-__wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
- list_head_t *liabilities)
+ does not get its 'go' set, in anticipation of the arrival
+ of consecutive smaller writes.
+ */
+ }
+
+ /* but if trickling writes are enabled, then do not hold back
+ writes if there are no outstanding requests
+ */
+
+ if (conf->trickling_writes && !wb_inode->transit && holder)
+ holder->ordering.go = 1;
+
+ if (wb_inode->dontsync > 0)
+ wb_inode->dontsync--;
+
+ return;
+}
+
+int
+__wb_handle_failed_conflict(wb_request_t *req, wb_request_t *conflict,
+ list_head_t *tasks)
{
- wb_request_t *req = NULL;
- wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ conf = req->wb_inode->this->private;
+
+ uuid_utoa_r(req->gfid, gfid);
+
+ if ((req->stub->fop != GF_FOP_FLUSH) &&
+ ((req->stub->fop != GF_FOP_FSYNC) || conf->resync_after_fsync)) {
+ if (!req->ordering.lied && list_empty(&conflict->wip)) {
+ /* If request itself is in liability queue,
+ * 1. We cannot unwind as the response has already been
+ * sent.
+ * 2. We cannot wind till conflict clears up.
+ * 3. So, skip the request for now.
+ * 4. Otherwise, resume (unwind) it with error.
+ */
+ req->op_ret = -1;
+ req->op_errno = conflict->op_errno;
+ if ((req->stub->fop == GF_FOP_TRUNCATE) ||
+ (req->stub->fop == GF_FOP_FTRUNCATE)) {
+ req->stub->frame->local = NULL;
+ }
+
+ list_del_init(&req->todo);
+ list_add_tail(&req->winds, tasks);
+
+ gf_msg_debug(req->wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): A conflicting write "
+ "request in liability queue has failed "
+ "to sync (error = \"%s\"), "
+ "unwinding this request as a failure",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen,
+ strerror(req->op_errno));
+
+ if (req->ordering.tempted) {
+ /* make sure that it won't be unwound in
+ * wb_do_unwinds too. Otherwise there'll be
+ * a double wind.
+ */
+ list_del_init(&req->lie);
+
+ gf_msg_debug(req->wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, "
+ "gfid=%s, gen=%" PRIu64
+ "): "
+ "removed from liability queue",
+ req->unique, gf_fop_list[req->fop], gfid,
+ req->gen);
+
+ __wb_fulfill_request(req);
+ }
+ }
+ } else {
+ gf_msg_debug(req->wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): A conflicting write request "
+ "in liability queue has failed to sync "
+ "(error = \"%s\"). This is an "
+ "FSYNC/FLUSH and we need to maintain ordering "
+ "guarantees with other writes in TODO queue. "
+ "Hence doing nothing now",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen,
+ strerror(conflict->op_errno));
+
+ /* flush and fsync (without conf->resync_after_fsync) act as
+ barriers. We cannot unwind them out of
+ order, when there are earlier generation writes just because
+ there is a conflicting liability with an error. So, wait for
+ our turn till there are no conflicting liabilities.
+
+ This situation can arise when there liabilities spread across
+ multiple generations. For eg., consider two writes with
+ following characterstics:
+
+ 1. they belong to different generations gen1, gen2 and
+ (gen1 > gen2).
+ 2. they overlap.
+ 3. both are liabilities.
+ 4. gen1 write was attempted to sync, but the attempt failed.
+ 5. there was no attempt to sync gen2 write yet.
+ 6. A flush (as part of close) is issued and gets a gen no
+ gen3.
+
+ In the above scenario, if flush is unwound without waiting
+ for gen1 and gen2 writes either to be successfully synced or
+ purged, we end up with these two writes in wb_inode->todo
+ list forever as there will be no attempt to process the queue
+ as flush is the last operation.
+ */
+ }
+
+ return 0;
+}
- list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
- if (wb_liability_has_conflict (wb_inode, req))
- continue;
+int
+__wb_pick_winds(wb_inode_t *wb_inode, list_head_t *tasks,
+ list_head_t *liabilities)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *conflict = NULL;
+ char req_gfid[64] =
+ {
+ 0,
+ },
+ conflict_gfid[64] = {
+ 0,
+ };
+
+ list_for_each_entry_safe(req, tmp, &wb_inode->todo, todo)
+ {
+ uuid_utoa_r(req->gfid, req_gfid);
+
+ conflict = wb_liability_has_conflict(wb_inode, req);
+ if (conflict) {
+ uuid_utoa_r(conflict->gfid, conflict_gfid);
+
+ gf_msg_debug(wb_inode->this->name, 0,
+ "Not winding request due to a "
+ "conflicting write in liability queue. "
+ "REQ: unique=%" PRIu64
+ ", fop=%s, "
+ "gen=%" PRIu64
+ ", gfid=%s. "
+ "CONFLICT: unique=%" PRIu64
+ ", fop=%s, "
+ "gen=%" PRIu64
+ ", gfid=%s, "
+ "conflicts-sync-failed?=%s, "
+ "conflicts-error=%s",
+ req->unique, gf_fop_list[req->fop], req->gen, req_gfid,
+ conflict->unique, gf_fop_list[conflict->fop],
+ conflict->gen, conflict_gfid,
+ (conflict->op_ret == 1) ? "yes" : "no",
+ strerror(conflict->op_errno));
+
+ if (conflict->op_ret == -1) {
+ /* There is a conflicting liability which failed
+ * to sync in previous attempts, resume the req
+ * and fail, unless its an fsync/flush.
+ */
+
+ __wb_handle_failed_conflict(req, conflict, tasks);
+ } else {
+ /* There is a conflicting liability which was
+ * not attempted to sync even once. Wait till
+ * at least one attempt to sync is made.
+ */
+ }
+
+ continue;
+ }
- if (req->ordering.tempted && !req->ordering.go)
- /* wait some more */
- continue;
+ if (req->ordering.tempted && !req->ordering.go) {
+ /* wait some more */
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64 ", fop=%s, gen=%" PRIu64
+ ", gfid=%s): ordering.go is not set, "
+ "hence not winding",
+ req->unique, gf_fop_list[req->fop], req->gen,
+ req_gfid);
+ continue;
+ }
- if (req->stub->fop == GF_FOP_WRITE) {
- if (wb_wip_has_conflict (wb_inode, req))
- continue;
+ if (req->stub->fop == GF_FOP_WRITE) {
+ conflict = wb_wip_has_conflict(wb_inode, req);
+
+ if (conflict) {
+ uuid_utoa_r(conflict->gfid, conflict_gfid);
+
+ gf_msg_debug(wb_inode->this->name, 0,
+ "Not winding write request as "
+ "a conflicting write is being "
+ "synced to backend. "
+ "REQ: unique=%" PRIu64
+ " fop=%s,"
+ " gen=%" PRIu64
+ ", gfid=%s. "
+ "CONFLICT: unique=%" PRIu64
+ " "
+ "fop=%s, gen=%" PRIu64
+ ", "
+ "gfid=%s",
+ req->unique, gf_fop_list[req->fop], req->gen,
+ req_gfid, conflict->unique,
+ gf_fop_list[conflict->fop], conflict->gen,
+ conflict_gfid);
+ continue;
+ }
+
+ list_add_tail(&req->wip, &wb_inode->wip);
+ req->wind_count++;
+
+ if (!req->ordering.tempted)
+ /* unrefed in wb_writev_cbk */
+ req->stub->frame->local = __wb_request_ref(req);
+ }
- list_add_tail (&req->wip, &wb_inode->wip);
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): picking the request for "
+ "winding",
+ req->unique, gf_fop_list[req->fop], req_gfid, req->gen);
- if (!req->ordering.tempted)
- /* unrefed in wb_writev_cbk */
- req->stub->frame->local =
- __wb_request_ref (req);
- }
+ list_del_init(&req->todo);
- list_del_init (&req->todo);
+ if (req->ordering.tempted) {
+ list_add_tail(&req->winds, liabilities);
+ } else {
+ list_add_tail(&req->winds, tasks);
+ }
+ }
- if (req->ordering.tempted)
- list_add_tail (&req->winds, liabilities);
- else
- list_add_tail (&req->winds, tasks);
- }
+ return 0;
}
-
void
-wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks)
+wb_do_winds(wb_inode_t *wb_inode, list_head_t *tasks)
{
- wb_request_t *req = NULL;
- wb_request_t *tmp = NULL;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- list_for_each_entry_safe (req, tmp, tasks, winds) {
- list_del_init (&req->winds);
+ list_for_each_entry_safe(req, tmp, tasks, winds)
+ {
+ list_del_init(&req->winds);
- call_resume (req->stub);
+ if (req->op_ret == -1) {
+ call_unwind_error_keep_stub(req->stub, req->op_ret, req->op_errno);
+ } else {
+ call_resume_keep_stub(req->stub);
+ }
- wb_request_unref (req);
- }
+ wb_request_unref(req);
+ }
}
-
void
-wb_process_queue (wb_inode_t *wb_inode)
+wb_process_queue(wb_inode_t *wb_inode)
{
- list_head_t tasks = {0, };
- list_head_t lies = {0, };
- list_head_t liabilities = {0, };
+ list_head_t tasks;
+ list_head_t lies;
+ list_head_t liabilities;
+ int wind_failure = 0;
- INIT_LIST_HEAD (&tasks);
- INIT_LIST_HEAD (&lies);
- INIT_LIST_HEAD (&liabilities);
+ INIT_LIST_HEAD(&tasks);
+ INIT_LIST_HEAD(&lies);
+ INIT_LIST_HEAD(&liabilities);
- LOCK (&wb_inode->lock);
- {
- __wb_preprocess_winds (wb_inode);
+ do {
+ gf_log_callingfn(wb_inode->this->name, GF_LOG_DEBUG,
+ "processing queues");
- __wb_pick_winds (wb_inode, &tasks, &liabilities);
+ LOCK(&wb_inode->lock);
+ {
+ __wb_preprocess_winds(wb_inode);
- __wb_pick_unwinds (wb_inode, &lies);
+ __wb_pick_winds(wb_inode, &tasks, &liabilities);
+ __wb_pick_unwinds(wb_inode, &lies);
}
- UNLOCK (&wb_inode->lock);
+ UNLOCK(&wb_inode->lock);
- wb_do_unwinds (wb_inode, &lies);
+ if (!list_empty(&lies))
+ wb_do_unwinds(wb_inode, &lies);
- wb_do_winds (wb_inode, &tasks);
+ if (!list_empty(&tasks))
+ wb_do_winds(wb_inode, &tasks);
- wb_fulfill (wb_inode, &liabilities);
+ /* If there is an error in wb_fulfill before winding write
+ * requests, we would miss invocation of wb_process_queue
+ * from wb_fulfill_cbk. So, retry processing again.
+ */
+ if (!list_empty(&liabilities))
+ wind_failure = wb_fulfill(wb_inode, &liabilities);
+ } while (wind_failure);
- return;
+ return;
}
+void
+wb_set_inode_size(wb_inode_t *wb_inode, struct iatt *postbuf)
+{
+ GF_ASSERT(wb_inode);
+ GF_ASSERT(postbuf);
+
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->size = postbuf->ia_size;
+ }
+ UNLOCK(&wb_inode->lock);
+}
int
-wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+wb_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- wb_request_t *req = NULL;
- wb_inode_t *wb_inode;
+ wb_request_t *req = NULL;
+ wb_inode_t *wb_inode;
- req = frame->local;
- frame->local = NULL;
- wb_inode = req->wb_inode;
+ req = frame->local;
+ frame->local = NULL;
+ wb_inode = req->wb_inode;
- wb_request_unref (req);
+ LOCK(&req->wb_inode->lock);
+ {
+ list_del_init(&req->wip);
+ }
+ UNLOCK(&req->wb_inode->lock);
- /* requests could be pending while this was in progress */
- wb_process_queue(wb_inode);
+ wb_request_unref(req);
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ /* requests could be pending while this was in progress */
+ wb_process_queue(wb_inode);
+
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+int
+wb_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_WIND(frame, wb_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
int
-wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+wb_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_WIND (frame, wb_writev_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ gf_boolean_t wb_disabled = 0;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int32_t op_errno = EINVAL;
+ int o_direct = O_DIRECT;
+
+ conf = this->private;
+
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!conf->strict_O_DIRECT)
+ o_direct = 0;
+
+ if (fd->flags & (O_SYNC | O_DSYNC | o_direct))
+ wb_disabled = 1;
+
+ if (flags & (O_SYNC | O_DSYNC | o_direct))
+ wb_disabled = 1;
+
+ if (wb_disabled)
+ stub = fop_writev_stub(frame, wb_writev_helper, fd, vector, count,
+ offset, flags, iobref, xdata);
+ else
+ stub = fop_writev_stub(frame, NULL, fd, vector, count, offset, flags,
+ iobref, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (wb_disabled)
+ ret = wb_enqueue(wb_inode, stub);
+ else
+ ret = wb_enqueue_tempted(wb_inode, stub);
+
+ if (!ret) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ wb_process_queue(wb_inode);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+
+ return 0;
}
+int
+wb_readv_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
int
-wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- wb_inode_t *wb_inode = NULL;
- wb_conf_t *conf = NULL;
- gf_boolean_t wb_disabled = 0;
- call_stub_t *stub = NULL;
- int ret = -1;
- int32_t op_errno = EINVAL;
- int o_direct = O_DIRECT;
-
- conf = this->private;
-
- if (wb_fd_err (fd, this, &op_errno)) {
- goto unwind;
- }
+wb_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_create (this, fd->inode);
- if (!wb_inode) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (!conf->strict_O_DIRECT)
- o_direct = 0;
-
- if (fd->flags & (O_SYNC|O_DSYNC|o_direct))
- wb_disabled = 1;
-
- if (flags & (O_SYNC|O_DSYNC|o_direct))
- wb_disabled = 1;
-
- if (wb_disabled)
- stub = fop_writev_stub (frame, wb_writev_helper, fd, vector,
- count, offset, flags, iobref, xdata);
- else
- stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
- flags, iobref, xdata);
- if (!stub) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (wb_disabled)
- ret = wb_enqueue (wb_inode, stub);
- else
- ret = wb_enqueue_tempted (wb_inode, stub);
+ stub = fop_readv_stub(frame, wb_readv_helper, fd, size, offset, flags,
+ xdata);
+ if (!stub)
+ goto unwind;
- if (!ret) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
- return 0;
+noqueue:
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+wb_flush_bg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
- xdata);
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
+int
+wb_flush_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_frame_t *bg_frame = NULL;
+ int32_t op_errno = 0;
+ int op_ret = 0;
+
+ conf = this->private;
+
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (conf->flush_behind)
+ goto flushbehind;
+
+ STACK_WIND(frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+
+flushbehind:
+ bg_frame = copy_frame(frame);
+ if (!bg_frame) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ STACK_WIND(bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ /* fall through */
+unwind:
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
int
-wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+wb_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (!wb_inode)
- goto noqueue;
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
- offset, flags, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_flush_stub(frame, wb_flush_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
- NULL);
- return 0;
+ STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+
+ return 0;
noqueue:
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
- xdata);
- return 0;
+ STACK_WIND(frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
}
-
int
-wb_flush_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+wb_fsync_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-
int
-wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+wb_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- wb_conf_t *conf = NULL;
- wb_inode_t *wb_inode = NULL;
- call_frame_t *bg_frame = NULL;
- int32_t op_errno = 0;
- int op_ret = 0;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = EINVAL;
- conf = this->private;
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (!wb_inode) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+ stub = fop_fsync_stub(frame, wb_fsync_helper, fd, datasync, xdata);
+ if (!stub)
+ goto unwind;
- if (wb_fd_err (fd, this, &op_errno)) {
- op_ret = -1;
- goto unwind;
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- if (conf->flush_behind)
- goto flushbehind;
+ wb_process_queue(wb_inode);
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
+ return 0;
-flushbehind:
- bg_frame = copy_frame (frame);
- if (!bg_frame) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- STACK_WIND (bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- /* fall through */
unwind:
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
+ STACK_UNWIND_STRICT(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
+int
+wb_stat_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
int
-wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+wb_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (!wb_inode)
- goto noqueue;
+ wb_inode = wb_inode_ctx_get(this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- stub = fop_flush_stub (frame, wb_flush_helper, fd, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_stat_stub(frame, wb_stat_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT(stat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
noqueue:
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
-
-
int
-wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+wb_fstat_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
- return 0;
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
-
int
-wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
- dict_t *xdata)
+wb_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
- int32_t op_errno = EINVAL;
-
- if (wb_fd_err (fd, this, &op_errno))
- goto unwind;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (!wb_inode)
- goto noqueue;
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_fstat_stub(frame, wb_fstat_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
noqueue:
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
- return 0;
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
+int32_t
+wb_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT(frame->local);
+
+ if (op_ret == 0)
+ wb_set_inode_size(frame->local, postbuf);
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
int
-wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+wb_truncate_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
+ STACK_WIND(frame, wb_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
-
int
-wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+wb_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ wb_inode = wb_inode_create(this, loc->inode);
+ if (!wb_inode)
+ goto unwind;
- wb_inode = wb_inode_ctx_get (this, loc->inode);
- if (!wb_inode)
- goto noqueue;
+ frame->local = wb_inode;
- stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_truncate_stub(frame, wb_truncate_helper, loc, offset, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
-noqueue:
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
+ return 0;
}
+int32_t
+wb_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT(frame->local);
+
+ if (op_ret == 0)
+ wb_set_inode_size(frame->local, postbuf);
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
int
-wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+wb_ftruncate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
+ STACK_WIND(frame, wb_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
-
int
-wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+wb_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = 0;
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (!wb_inode)
- goto noqueue;
+ frame->local = wb_inode;
- stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_ftruncate_stub(frame, wb_ftruncate_helper, fd, offset, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub)) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
+ frame->local = NULL;
- if (stub)
- call_stub_destroy (stub);
- return 0;
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
-noqueue:
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
}
+int
+wb_setattr_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
int
-wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset, dict_t *xdata)
+wb_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+
+ wb_inode = wb_inode_ctx_get(this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
+
+ stub = fop_setattr_stub(frame, wb_setattr_helper, loc, stbuf, valid, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue(wb_inode);
+
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
}
+int
+wb_fsetattr_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
int
-wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+wb_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_create (this, loc->inode);
- if (!wb_inode)
- goto unwind;
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
- offset, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_fsetattr_stub(frame, wb_fsetattr_helper, fd, stbuf, valid,
+ xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
- return 0;
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
+
+int32_t
+wb_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
+
+ if (((flags & O_RDWR) || (flags & O_WRONLY)) && (flags & O_TRUNC))
+ wb_inode->size = 0;
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
- if (stub)
- call_stub_destroy (stub);
+int32_t
+wb_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
- return 0;
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
+
+ if (((flags & O_RDWR) || (flags & O_WRONLY)) && (flags & O_TRUNC))
+ wb_inode->size = 0;
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
}
+int32_t
+wb_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ if (op_ret == 0) {
+ wb_inode_t *wb_inode = wb_inode_ctx_get(this, inode);
+ if (wb_inode)
+ wb_set_inode_size(wb_inode, buf);
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
int
-wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
+wb_lookup_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
+ STACK_WIND(frame, wb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
}
+int32_t
+wb_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
-int
-wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+ wb_inode = wb_inode_ctx_get(this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
+
+ stub = fop_lookup_stub(frame, wb_lookup_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue(wb_inode);
+
+ return 0;
+
+unwind:
+ if (stub)
+ call_stub_destroy(stub);
+
+ STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, wb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+static void
+wb_mark_readdirp_start(xlator_t *this, inode_t *directory)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
- int32_t op_errno = 0;
+ wb_inode_t *wb_directory_inode = NULL;
- wb_inode = wb_inode_create (this, fd->inode);
- if (!wb_inode) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_directory_inode = wb_inode_create(this, directory);
- if (wb_fd_err (fd, this, &op_errno))
- goto unwind;
+ if (!wb_directory_inode)
+ return;
- stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
- offset, xdata);
- if (!stub) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ LOCK(&wb_directory_inode->lock);
+ {
+ GF_ATOMIC_INC(wb_directory_inode->readdirps);
+ }
+ UNLOCK(&wb_directory_inode->lock);
- if (!wb_enqueue (wb_inode, stub)) {
- op_errno = ENOMEM;
- goto unwind;
+ return;
+}
+
+static void
+wb_mark_readdirp_end(xlator_t *this, inode_t *directory)
+{
+ wb_inode_t *wb_directory_inode = NULL, *wb_inode = NULL, *tmp = NULL;
+ int readdirps = 0;
+
+ wb_directory_inode = wb_inode_ctx_get(this, directory);
+
+ if (!wb_directory_inode)
+ return;
+
+ LOCK(&wb_directory_inode->lock);
+ {
+ readdirps = GF_ATOMIC_DEC(wb_directory_inode->readdirps);
+ if (readdirps)
+ goto unlock;
+
+ list_for_each_entry_safe(wb_inode, tmp,
+ &wb_directory_inode->invalidate_list,
+ invalidate_list)
+ {
+ list_del_init(&wb_inode->invalidate_list);
+ GF_ATOMIC_INIT(wb_inode->invalidate, 0);
+ inode_unref(wb_inode->inode);
}
+ }
+unlock:
+ UNLOCK(&wb_directory_inode->lock);
- wb_process_queue (wb_inode);
+ return;
+}
- return 0;
+int32_t
+wb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ gf_dirent_t *entry = NULL;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode || !IA_ISREG(entry->d_stat.ia_type))
+ continue;
+
+ wb_inode = wb_inode_ctx_get(this, entry->inode);
+ if (!wb_inode)
+ continue;
+
+ LOCK(&wb_inode->lock);
+ {
+ if (!list_empty(&wb_inode->liability) ||
+ GF_ATOMIC_GET(wb_inode->invalidate)) {
+ inode = entry->inode;
+
+ entry->inode = NULL;
+ memset(&entry->d_stat, 0, sizeof(entry->d_stat));
+ }
+ }
+ UNLOCK(&wb_inode->lock);
+
+ if (inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ }
unwind:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ wb_mark_readdirp_end(this, fd->inode);
- if (stub)
- call_stub_destroy (stub);
- return 0;
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
-
-int
-wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+int32_t
+wb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ wb_mark_readdirp_start(this, fd->inode);
+
+ frame->local = fd;
+
+ STACK_WIND(frame, wb_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
}
+int32_t
+wb_link_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
-int
-wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+int32_t
+wb_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_ctx_get (this, loc->inode);
- if (!wb_inode)
- goto noqueue;
+ wb_inode = wb_inode_ctx_get(this, oldloc->inode);
+ if (!wb_inode)
+ goto noqueue;
- stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf,
- valid, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_link_stub(frame, wb_link_helper, oldloc, newloc, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
+
+ return 0;
- return 0;
unwind:
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
- if (stub)
- call_stub_destroy (stub);
- return 0;
+ return 0;
noqueue:
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
}
+int32_t
+wb_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset,
+ len, xdata);
+ return 0;
+}
-int
-wb_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+int32_t
+wb_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
+
+ stub = fop_fallocate_stub(frame, wb_fallocate_helper, fd, keep_size, offset,
+ len, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue(wb_inode);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+
+ return 0;
+
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset,
+ len, xdata);
+ return 0;
}
+int32_t
+wb_discard_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+ return 0;
+}
-int
-wb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+int32_t
+wb_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (!wb_inode)
- goto noqueue;
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- stub = fop_fsetattr_stub (frame, wb_fsetattr_helper, fd, stbuf,
- valid, xdata);
- if (!stub)
- goto unwind;
+ stub = fop_discard_stub(frame, wb_discard_helper, fd, offset, len, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_enqueue (wb_inode, stub))
- goto unwind;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- wb_process_queue (wb_inode);
+ wb_process_queue(wb_inode);
+
+ return 0;
- return 0;
unwind:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
- if (stub)
- call_stub_destroy (stub);
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
noqueue:
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
}
+int32_t
+wb_zerofill_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
+}
-int
-wb_forget (xlator_t *this, inode_t *inode)
+int32_t
+wb_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- uint64_t tmp = 0;
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- inode_ctx_del (inode, this, &tmp);
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- wb_inode = (wb_inode_t *)(long)tmp;
+ stub = fop_zerofill_stub(frame, wb_zerofill_helper, fd, offset, len, xdata);
+ if (!stub)
+ goto unwind;
- if (!wb_inode)
- return 0;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- GF_ASSERT (list_empty (&wb_inode->todo));
- GF_ASSERT (list_empty (&wb_inode->liability));
- GF_ASSERT (list_empty (&wb_inode->temptation));
+ wb_process_queue(wb_inode);
- GF_FREE (wb_inode);
+ return 0;
- return 0;
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
}
+int32_t
+wb_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+
+ wb_inode = wb_inode_ctx_get(this, oldloc->inode);
+ if (!wb_inode)
+ goto noqueue;
+
+ stub = fop_rename_stub(frame, default_rename_resume, oldloc, newloc, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue(wb_inode);
+
+ return 0;
+
+unwind:
+ if (stub)
+ call_stub_destroy(stub);
+
+ STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
int
-wb_release (xlator_t *this, fd_t *fd)
+wb_forget(xlator_t *this, inode_t *inode)
{
- uint64_t tmp = 0;
+ uint64_t tmp = 0;
+ wb_inode_t *wb_inode = NULL;
+
+ inode_ctx_del(inode, this, &tmp);
- fd_ctx_del (fd, this, &tmp);
+ wb_inode = (wb_inode_t *)(long)tmp;
+ if (!wb_inode)
return 0;
+
+ wb_inode_destroy(wb_inode);
+
+ return 0;
}
+int
+wb_release(xlator_t *this, fd_t *fd)
+{
+ uint64_t tmp = 0;
+
+ (void)fd_ctx_del(fd, this, &tmp);
+
+ return 0;
+}
int
-wb_priv_dump (xlator_t *this)
+wb_priv_dump(xlator_t *this)
{
- wb_conf_t *conf = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- int ret = -1;
+ wb_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO("write-behind", this, out);
- conf = this->private;
- GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
- gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
- "priv");
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.write-behind",
+ "priv");
- gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
- gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size);
- gf_proc_dump_write ("window_size", "%d", conf->window_size);
- gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind);
- gf_proc_dump_write ("trickling_writes", "%d", conf->trickling_writes);
+ gf_proc_dump_write("aggregate_size", "%" PRIu64, conf->aggregate_size);
+ gf_proc_dump_write("window_size", "%" PRIu64, conf->window_size);
+ gf_proc_dump_write("flush_behind", "%d", conf->flush_behind);
+ gf_proc_dump_write("trickling_writes", "%d", conf->trickling_writes);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
void
-__wb_dump_requests (struct list_head *head, char *prefix)
+__wb_dump_requests(struct list_head *head, char *prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN] = {0, };
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }, flag = 0;
- wb_request_t *req = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] =
+ {
+ 0,
+ },
+ flag = 0;
+ wb_request_t *req = NULL;
- list_for_each_entry (req, head, all) {
- gf_proc_dump_build_key (key_prefix, key,
- (char *)gf_fop_list[req->fop]);
+ list_for_each_entry(req, head, all)
+ {
+ gf_proc_dump_build_key(key_prefix, key, "%s",
+ (char *)gf_fop_list[req->fop]);
- gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
- gf_proc_dump_write ("request-ptr", "%p", req);
+ gf_proc_dump_write("unique", "%" PRIu64, req->unique);
- gf_proc_dump_write ("refcount", "%d", req->refcount);
+ gf_proc_dump_write("refcount", "%d", req->refcount);
- if (list_empty (&req->todo))
- gf_proc_dump_write ("wound", "yes");
- else
- gf_proc_dump_write ("wound", "no");
+ if (list_empty(&req->todo))
+ gf_proc_dump_write("wound", "yes");
+ else
+ gf_proc_dump_write("wound", "no");
- if (req->fop == GF_FOP_WRITE) {
- gf_proc_dump_write ("size", "%"GF_PRI_SIZET,
- req->write_size);
+ gf_proc_dump_write("generation-number", "%" PRIu64, req->gen);
- gf_proc_dump_write ("offset", "%"PRId64,
- req->stub->args.offset);
+ gf_proc_dump_write("req->op_ret", "%d", req->op_ret);
+ gf_proc_dump_write("req->op_errno", "%d", req->op_errno);
+ gf_proc_dump_write("sync-attempts", "%d", req->wind_count);
- flag = req->ordering.lied;
- gf_proc_dump_write ("lied", "%d", flag);
+ if (req->fop == GF_FOP_WRITE) {
+ if (list_empty(&req->wip))
+ gf_proc_dump_write("sync-in-progress", "no");
+ else
+ gf_proc_dump_write("sync-in-progress", "yes");
- flag = req->ordering.append;
- gf_proc_dump_write ("append", "%d", flag);
+ gf_proc_dump_write("size", "%" GF_PRI_SIZET, req->write_size);
- flag = req->ordering.fulfilled;
- gf_proc_dump_write ("fulfilled", "%d", flag);
+ if (req->stub)
+ gf_proc_dump_write("offset", "%" PRId64,
+ req->stub->args.offset);
- flag = req->ordering.go;
- gf_proc_dump_write ("go", "%d", flag);
- }
+ flag = req->ordering.lied;
+ gf_proc_dump_write("lied", "%d", flag);
+
+ flag = req->ordering.append;
+ gf_proc_dump_write("append", "%d", flag);
+
+ flag = req->ordering.fulfilled;
+ gf_proc_dump_write("fulfilled", "%d", flag);
+
+ flag = req->ordering.go;
+ gf_proc_dump_write("go", "%d", flag);
}
+ }
}
-
int
-wb_inode_dump (xlator_t *this, inode_t *inode)
+wb_inode_dump(xlator_t *this, inode_t *inode)
{
- wb_inode_t *wb_inode = NULL;
- int32_t ret = -1;
- char *path = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- char uuid_str[64] = {0,};
+ wb_inode_t *wb_inode = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char uuid_str[64] = {
+ 0,
+ };
+
+ if ((inode == NULL) || (this == NULL)) {
+ ret = 0;
+ goto out;
+ }
- if ((inode == NULL) || (this == NULL)) {
- ret = 0;
- goto out;
- }
+ wb_inode = wb_inode_ctx_get(this, inode);
+ if (wb_inode == NULL) {
+ ret = 0;
+ goto out;
+ }
- wb_inode = wb_inode_ctx_get (this, inode);
- if (wb_inode == NULL) {
- ret = 0;
- goto out;
- }
+ uuid_utoa_r(inode->gfid, uuid_str);
- gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
- "wb_inode");
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.write-behind",
+ "wb_inode");
- gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_add_section("%s", key_prefix);
- __inode_path (inode, NULL, &path);
- if (path != NULL) {
- gf_proc_dump_write ("path", "%s", path);
- GF_FREE (path);
- }
+ __inode_path(inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
- gf_proc_dump_write ("inode", "%p", inode);
+ gf_proc_dump_write("inode", "%p", inode);
- gf_proc_dump_write ("window_conf", "%"GF_PRI_SIZET,
- wb_inode->window_conf);
+ gf_proc_dump_write("gfid", "%s", uuid_str);
- gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET,
- wb_inode->window_current);
+ gf_proc_dump_write("window_conf", "%" GF_PRI_SIZET, wb_inode->window_conf);
+ gf_proc_dump_write("window_current", "%" GF_PRI_SIZET,
+ wb_inode->window_current);
- ret = TRY_LOCK (&wb_inode->lock);
- if (!ret)
- {
- if (!list_empty (&wb_inode->all)) {
- __wb_dump_requests (&wb_inode->all, key_prefix);
- }
- UNLOCK (&wb_inode->lock);
+ gf_proc_dump_write("transit-size", "%" GF_PRI_SIZET, wb_inode->transit);
+
+ gf_proc_dump_write("dontsync", "%d", wb_inode->dontsync);
+
+ ret = TRY_LOCK(&wb_inode->lock);
+ if (!ret) {
+ if (!list_empty(&wb_inode->all)) {
+ __wb_dump_requests(&wb_inode->all, key_prefix);
}
+ UNLOCK(&wb_inode->lock);
+ }
- if (ret && wb_inode)
- gf_proc_dump_write ("Unable to dump the inode information",
- "(Lock acquisition failed) %p (gfid: %s)",
- wb_inode,
- uuid_utoa_r (inode->gfid, uuid_str));
- ret = 0;
+ if (ret && wb_inode)
+ gf_proc_dump_write("Unable to dump the inode information",
+ "(Lock acquisition failed) %p (gfid: %s)", wb_inode,
+ uuid_str);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this) {
- goto out;
- }
+ if (!this) {
+ goto out;
+ }
- ret = xlator_mem_acct_init (this, gf_wb_mt_end + 1);
+ ret = xlator_mem_acct_init(this, gf_wb_mt_end + 1);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- }
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, WRITE_BEHIND_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
+ }
out:
- return ret;
+ return ret;
}
-
int
-reconfigure (xlator_t *this, dict_t *options)
+reconfigure(xlator_t *this, dict_t *options)
{
- wb_conf_t *conf = NULL;
- int ret = -1;
+ wb_conf_t *conf = NULL;
+ int ret = -1;
- conf = this->private;
+ conf = this->private;
- GF_OPTION_RECONF ("cache-size", conf->window_size, options, size, out);
+ GF_OPTION_RECONF("cache-size", conf->window_size, options, size_uint64,
+ out);
- GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool,
- out);
+ GF_OPTION_RECONF("flush-behind", conf->flush_behind, options, bool, out);
- GF_OPTION_RECONF ("trickling-writes", conf->trickling_writes, options,
- bool, out);
+ GF_OPTION_RECONF("trickling-writes", conf->trickling_writes, options, bool,
+ out);
- GF_OPTION_RECONF ("strict-O_DIRECT", conf->strict_O_DIRECT, options,
- bool, out);
+ GF_OPTION_RECONF("strict-O_DIRECT", conf->strict_O_DIRECT, options, bool,
+ out);
- GF_OPTION_RECONF ("strict-write-ordering", conf->strict_write_ordering,
- options, bool, out);
- ret = 0;
+ GF_OPTION_RECONF("strict-write-ordering", conf->strict_write_ordering,
+ options, bool, out);
+ GF_OPTION_RECONF("resync-failed-syncs-after-fsync",
+ conf->resync_after_fsync, options, bool, out);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int32_t
-init (xlator_t *this)
+init(xlator_t *this)
{
- wb_conf_t *conf = NULL;
- int32_t ret = -1;
-
- if ((this->children == NULL)
- || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: write-behind (%s) not configured with exactly "
- "one child", this->name);
- goto out;
- }
-
- if (this->parents == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfilex");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_wb_mt_wb_conf_t);
- if (conf == NULL) {
- goto out;
- }
-
- /* configure 'options aggregate-size <size>' */
- conf->aggregate_size = WB_AGGREGATE_SIZE;
-
- /* configure 'option window-size <size>' */
- GF_OPTION_INIT ("cache-size", conf->window_size, size, out);
-
- if (!conf->window_size && conf->aggregate_size) {
- gf_log (this->name, GF_LOG_WARNING,
- "setting window-size to be equal to "
- "aggregate-size(%"PRIu64")",
- conf->aggregate_size);
- conf->window_size = conf->aggregate_size;
- }
-
- if (conf->window_size < conf->aggregate_size) {
- gf_log (this->name, GF_LOG_ERROR,
- "aggregate-size(%"PRIu64") cannot be more than "
- "window-size(%"PRIu64")", conf->aggregate_size,
- conf->window_size);
- goto out;
- }
-
- /* configure 'option flush-behind <on/off>' */
- GF_OPTION_INIT ("flush-behind", conf->flush_behind, bool, out);
-
- GF_OPTION_INIT ("trickling-writes", conf->trickling_writes, bool, out);
-
- GF_OPTION_INIT ("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out);
-
- GF_OPTION_INIT ("strict-write-ordering", conf->strict_write_ordering,
- bool, out);
-
- this->private = conf;
- ret = 0;
+ wb_conf_t *conf = NULL;
+ int32_t ret = -1;
+
+ if ((this->children == NULL) || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, WRITE_BEHIND_MSG_INIT_FAILED,
+ "FATAL: write-behind (%s) not configured with exactly "
+ "one child",
+ this->name);
+ goto out;
+ }
+
+ if (this->parents == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfilex");
+ }
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_wb_mt_wb_conf_t);
+ if (conf == NULL) {
+ goto out;
+ }
+
+ /* configure 'options aggregate-size <size>' */
+ GF_OPTION_INIT("aggregate-size", conf->aggregate_size, size_uint64, out);
+ conf->page_size = conf->aggregate_size;
+
+ /* configure 'option window-size <size>' */
+ GF_OPTION_INIT("cache-size", conf->window_size, size_uint64, out);
+
+ if (!conf->window_size && conf->aggregate_size) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, WRITE_BEHIND_MSG_SIZE_NOT_SET,
+ "setting window-size to be equal to "
+ "aggregate-size(%" PRIu64 ")",
+ conf->aggregate_size);
+ conf->window_size = conf->aggregate_size;
+ }
+
+ if (conf->window_size < conf->aggregate_size) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, WRITE_BEHIND_MSG_EXCEEDED_MAX_SIZE,
+ "aggregate-size(%" PRIu64
+ ") cannot be more than "
+ "window-size(%" PRIu64 ")",
+ conf->aggregate_size, conf->window_size);
+ goto out;
+ }
+
+ /* configure 'option flush-behind <on/off>' */
+ GF_OPTION_INIT("flush-behind", conf->flush_behind, bool, out);
+
+ GF_OPTION_INIT("trickling-writes", conf->trickling_writes, bool, out);
+
+ GF_OPTION_INIT("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out);
+
+ GF_OPTION_INIT("strict-write-ordering", conf->strict_write_ordering, bool,
+ out);
+
+ GF_OPTION_INIT("resync-failed-syncs-after-fsync", conf->resync_after_fsync,
+ bool, out);
+
+ this->private = conf;
+ ret = 0;
out:
- if (ret) {
- GF_FREE (conf);
- }
- return ret;
+ if (ret) {
+ GF_FREE(conf);
+ }
+ return ret;
}
-
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- wb_conf_t *conf = NULL;
+ wb_conf_t *conf = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO("write-behind", this, out);
- conf = this->private;
- if (!conf) {
- goto out;
- }
+ conf = this->private;
+ if (!conf) {
+ goto out;
+ }
- this->private = NULL;
- GF_FREE (conf);
+ this->private = NULL;
+ GF_FREE(conf);
out:
- return;
+ return;
}
-
struct xlator_fops fops = {
- .writev = wb_writev,
- .readv = wb_readv,
- .flush = wb_flush,
- .fsync = wb_fsync,
- .stat = wb_stat,
- .fstat = wb_fstat,
- .truncate = wb_truncate,
- .ftruncate = wb_ftruncate,
- .setattr = wb_setattr,
- .fsetattr = wb_fsetattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = wb_forget,
- .release = wb_release
+ .writev = wb_writev,
+ .readv = wb_readv,
+ .flush = wb_flush,
+ .fsync = wb_fsync,
+ .stat = wb_stat,
+ .fstat = wb_fstat,
+ .truncate = wb_truncate,
+ .ftruncate = wb_ftruncate,
+ .setattr = wb_setattr,
+ .fsetattr = wb_fsetattr,
+ .lookup = wb_lookup,
+ .readdirp = wb_readdirp,
+ .link = wb_link,
+ .fallocate = wb_fallocate,
+ .discard = wb_discard,
+ .zerofill = wb_zerofill,
+ .rename = wb_rename,
};
+struct xlator_cbks cbks = {.forget = wb_forget, .release = wb_release};
struct xlator_dumpops dumpops = {
- .priv = wb_priv_dump,
- .inodectx = wb_inode_dump,
+ .priv = wb_priv_dump,
+ .inodectx = wb_inode_dump,
};
-
struct volume_options options[] = {
- { .key = {"flush-behind"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "If this option is set ON, instructs write-behind "
- "translator to perform flush in background, by "
- "returning success (or any errors, if any of "
- "previous writes were failed) to application even "
- "before flush FOP is sent to backend filesystem. "
- },
- { .key = {"cache-size", "window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 512 * GF_UNIT_KB,
- .max = 1 * GF_UNIT_GB,
- .default_value = "1MB",
- .description = "Size of the write-behind buffer for a single file "
- "(inode)."
- },
- { .key = {"trickling-writes"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"strict-O_DIRECT"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "This option when set to off, ignores the "
- "O_DIRECT flag."
- },
- { .key = {"strict-write-ordering"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Do not let later writes overtake earlier writes even "
- "if they do not overlap",
- },
- { .key = {NULL} },
+ {
+ .key = {"write-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable write-behind",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"flush-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "If this option is set ON, instructs write-behind "
+ "translator to perform flush in background, by "
+ "returning success (or any errors, if any of "
+ "previous writes were failed) to application even "
+ "before flush FOP is sent to backend filesystem. "},
+ {.key = {"cache-size", "window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 512 * GF_UNIT_KB,
+ .max = 1 * GF_UNIT_GB,
+ .default_value = "1MB",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "Size of the write-behind buffer for a single file "
+ "(inode)."},
+ {
+ .key = {"trickling-writes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {GD_OP_VERSION_3_13_1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .default_value = "on",
+ },
+ {.key = {"strict-O_DIRECT"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "This option when set to off, ignores the "
+ "O_DIRECT flag."},
+ {
+ .key = {"strict-write-ordering"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "Do not let later writes overtake earlier writes even "
+ "if they do not overlap",
+ },
+ {
+ .key = {"resync-failed-syncs-after-fsync"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_3_7_7},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "If sync of \"cached-writes issued before fsync\" "
+ "(to backend) fails, this option configures whether "
+ "to retry syncing them after fsync or forget them. "
+ "If set to on, cached-writes are retried "
+ "till a \"flush\" fop (or a successful sync) on sync "
+ "failures. "
+ "fsync itself is failed irrespective of the value of "
+ "this option. ",
+ },
+ {
+ .key = {"aggregate-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "128KB",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .description = "Will aggregate writes until data of specified "
+ "size is fully filled for a single file provided "
+ "there are no dependent fops on cached writes. This "
+ "option just sets the aggregate size. Note that "
+ "aggregation won't happen if "
+ "performance.write-behind-trickling-writes"
+ " is turned on. Hence turn off "
+ "performance.write-behind.trickling-writes"
+ " so that writes are aggregated till a max of "
+ "\"aggregate-size\" bytes",
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "write-behind",
+ .category = GF_MAINTAINED,
};