summaryrefslogtreecommitdiffstats
path: root/xlators/performance
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/performance')
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am11
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h69
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2945
-rw-r--r--xlators/performance/io-cache/src/io-cache.h437
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c324
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h29
-rw-r--r--xlators/performance/io-cache/src/page.c1544
-rw-r--r--xlators/performance/io-threads/src/Makefile.am10
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h41
-rw-r--r--xlators/performance/io-threads/src/io-threads.c3726
-rw-r--r--xlators/performance/io-threads/src/io-threads.h229
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/performance/md-cache/src/Makefile.am29
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h23
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4020
-rw-r--r--xlators/performance/nl-cache/Makefile.am (renamed from xlators/performance/symlink-cache/Makefile.am)2
-rw-r--r--xlators/performance/nl-cache/src/Makefile.am12
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c1201
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h27
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h29
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c840
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h175
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am16
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h22
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h32
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1101
-rw-r--r--xlators/performance/quick-read/src/Makefile.am10
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h23
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h31
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3565
-rw-r--r--xlators/performance/quick-read/src/quick-read.h138
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am10
-rw-r--r--xlators/performance/read-ahead/src/page.c874
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h25
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h31
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1835
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h201
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am18
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h30
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1382
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h98
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c3895
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h104
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am12
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c409
-rw-r--r--xlators/performance/write-behind/src/Makefile.am10
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h24
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h31
-rw-r--r--xlators/performance/write-behind/src/write-behind.c4934
56 files changed, 19500 insertions, 15179 deletions
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am
index e91d5f6efc8..e95725acb8c 100644
--- a/xlators/performance/Makefile.am
+++ b/xlators/performance/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read stat-prefetch
+SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache \
+ quick-read md-cache open-behind nl-cache
CLEANFILES =
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am
index e3d816f1542..bfa34ce5502 100644
--- a/xlators/performance/io-cache/src/Makefile.am
+++ b/xlators/performance/io-cache/src/Makefile.am
@@ -1,14 +1,17 @@
xlator_LTLIBRARIES = io-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_cache_la_LDFLAGS = -module -avoidversion
+io_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c
io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-cache.h
+noinst_HEADERS = io-cache.h ioc-mem-types.h io-cache-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/io-cache/src/io-cache-messages.h b/xlators/performance/io-cache/src/io-cache-messages.h
new file mode 100644
index 00000000000..38ad0b14d0e
--- /dev/null
+++ b/xlators/performance/io-cache/src/io-cache-messages.h
@@ -0,0 +1,69 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IO_CACHE_MESSAGES_H_
+#define _IO_CACHE_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(IO_CACHE, IO_CACHE_MSG_ENFORCEMENT_FAILED,
+ IO_CACHE_MSG_INVALID_ARGUMENT,
+ IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, IO_CACHE_MSG_NO_MEMORY,
+ IO_CACHE_MSG_VOL_MISCONFIGURED, IO_CACHE_MSG_INODE_NULL,
+ IO_CACHE_MSG_PAGE_WAIT_VALIDATE, IO_CACHE_MSG_STR_COVERSION_FAILED,
+ IO_CACHE_MSG_WASTED_COPY, IO_CACHE_MSG_SET_FD_FAILED,
+ IO_CACHE_MSG_TABLE_NULL, IO_CACHE_MSG_MEMORY_INIT_FAILED,
+ IO_CACHE_MSG_NO_CACHE_SIZE_OPT, IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE,
+ IO_CACHE_MSG_CREATE_MEM_POOL_FAILED,
+ IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, IO_CACHE_MSG_NULL_PAGE_WAIT,
+ IO_CACHE_MSG_FRAME_NULL, IO_CACHE_MSG_PAGE_FAULT,
+ IO_CACHE_MSG_SERVE_READ_REQUEST, IO_CACHE_MSG_LOCAL_NULL,
+ IO_CACHE_MSG_DEFAULTING_TO_OLD);
+
+#define IO_CACHE_MSG_NO_MEMORY_STR "out of memory"
+#define IO_CACHE_MSG_ENFORCEMENT_FAILED_STR "inode context is NULL"
+#define IO_CACHE_MSG_SET_FD_FAILED_STR "failed to set fd ctx"
+#define IO_CACHE_MSG_TABLE_NULL_STR "table is NULL"
+#define IO_CACHE_MSG_MEMORY_INIT_FAILED_STR "Memory accounting init failed"
+#define IO_CACHE_MSG_NO_CACHE_SIZE_OPT_STR "could not get cache-size option"
+#define IO_CACHE_MSG_INVALID_ARGUMENT_STR \
+ "file size is greater than the max size"
+#define IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE_STR "Not reconfiguring cache-size"
+#define IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED_STR \
+ "FATAL: io-cache not configured with exactly one child"
+#define IO_CACHE_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile"
+#define IO_CACHE_MSG_CREATE_MEM_POOL_FAILED_STR \
+ "failed to create local_t's memory pool"
+#define IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED_STR "Unable to allocate mem_pool"
+#define IO_CACHE_MSG_STR_COVERSION_FAILED_STR \
+ "asprintf failed while converting prt to str"
+#define IO_CACHE_MSG_INODE_NULL_STR "ioc_inode is NULL"
+#define IO_CACHE_MSG_PAGE_WAIT_VALIDATE_STR \
+ "cache validate called without any page waiting to be validated"
+#define IO_CACHE_MSG_NULL_PAGE_WAIT_STR "asked to wait on a NULL page"
+#define IO_CACHE_MSG_WASTED_COPY_STR "wasted copy"
+#define IO_CACHE_MSG_FRAME_NULL_STR "frame>root>rsp_refs is null"
+#define IO_CACHE_MSG_PAGE_FAULT_STR "page fault on a NULL frame"
+#define IO_CACHE_MSG_SERVE_READ_REQUEST_STR \
+ "NULL page has been provided to serve read request"
+#define IO_CACHE_MSG_LOCAL_NULL_STR "local is NULL"
+#define IO_CACHE_MSG_DEFAULTING_TO_OLD_STR \
+ "minimum size of file that can be cached is greater than maximum size. " \
+ "Hence Defaulting to old value"
+#endif /* _IO_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index 96aa8fdef7b..9375d29c17f 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -1,238 +1,358 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <math.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "io-cache.h"
-#include "statedump.h"
+#include "ioc-mem-types.h"
+#include <glusterfs/statedump.h>
#include <assert.h>
#include <sys/time.h>
-
+#include "io-cache-messages.h"
int ioc_log2_page_size;
uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path);
-
-uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path);
+ioc_get_priority(ioc_table_t *table, const char *path);
+struct volume_options options[];
-inline uint32_t
-ioc_hashfn (void *data, int len)
+static uint32_t
+ioc_hashfn(void *data, int len)
{
- off_t offset;
+ off_t offset;
- offset = *(off_t *) data;
+ offset = *(off_t *)data;
- return (offset >> ioc_log2_page_size);
+ return (offset >> ioc_log2_page_size);
}
-inline ioc_inode_t *
+/* TODO: This function is not used, uncomment when we find a
+ usage for this function.
+
+static ioc_inode_t *
ioc_inode_reupdate (ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = ioc_inode->table;
+ ioc_table_t *table = NULL;
+
+ table = ioc_inode->table;
- list_add_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
-
- return ioc_inode;
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+
+ return ioc_inode;
}
-inline ioc_inode_t *
+
+static ioc_inode_t *
ioc_get_inode (dict_t *dict, char *name)
{
- ioc_inode_t *ioc_inode = NULL;
- data_t *ioc_inode_data = dict_get (dict, name);
- ioc_table_t *table = NULL;
-
- if (ioc_inode_data) {
- ioc_inode = data_to_ptr (ioc_inode_data);
- table = ioc_inode->table;
-
- ioc_table_lock (table);
- {
- if (list_empty (&ioc_inode->inode_lru)) {
- ioc_inode = ioc_inode_reupdate (ioc_inode);
- }
- }
- ioc_table_unlock (table);
- }
-
- return ioc_inode;
+ ioc_inode_t *ioc_inode = NULL;
+ data_t *ioc_inode_data = NULL;
+ ioc_table_t *table = NULL;
+
+ ioc_inode_data = dict_get (dict, name);
+ if (ioc_inode_data) {
+ ioc_inode = data_to_ptr (ioc_inode_data);
+ table = ioc_inode->table;
+
+ ioc_table_lock (table);
+ {
+ if (list_empty (&ioc_inode->inode_lru)) {
+ ioc_inode = ioc_inode_reupdate (ioc_inode);
+ }
+ }
+ ioc_table_unlock (table);
+ }
+
+ return ioc_inode;
}
+*/
-int32_t
-ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
+int
+ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iovec *vector, int32_t count, int op_ret, off_t offset)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {0,};
- int32_t ret = -1;
- ioc_table_t *table = ioc_inode->table;
+ size_t size = 0;
+ off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0,
+ write_offset = 0;
+ off_t page_offset = 0, page_end = 0;
+ ioc_page_t *trav = NULL;
+
+ size = iov_length(vector, count);
+ size = min(size, op_ret);
+
+ rounded_offset = gf_floor(offset, ioc_inode->table->page_size);
+ rounded_end = gf_roof(offset + size, ioc_inode->table->page_size);
+
+ trav_offset = rounded_offset;
+ ioc_inode_lock(ioc_inode);
+ {
+ while (trav_offset < rounded_end) {
+ trav = __ioc_page_get(ioc_inode, trav_offset);
+ if (trav && trav->ready) {
+ if (trav_offset == rounded_offset)
+ page_offset = offset - rounded_offset;
+ else
+ page_offset = 0;
+
+ if ((trav_offset + ioc_inode->table->page_size) >=
+ rounded_end) {
+ page_end = trav->size - (rounded_end - (offset + size));
+ } else {
+ page_end = trav->size;
+ }
+
+ iov_range_copy(trav->vector, trav->count, page_offset, vector,
+ count, write_offset, page_end - page_offset);
+ } else if (trav) {
+ if (!trav->waitq)
+ ioc_inode->table->cache_used -= __ioc_page_destroy(trav);
+ }
+
+ if (trav_offset == rounded_offset)
+ write_offset += (ioc_inode->table->page_size -
+ (offset - rounded_offset));
+ else
+ write_offset += ioc_inode->table->page_size;
+
+ trav_offset += ioc_inode->table->page_size;
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
- ret = gettimeofday (&tv, NULL);
+ return 0;
+}
+
+static gf_boolean_t
+ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)
+{
+ ioc_table_t *table = NULL;
- if (time_elapsed (&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
- need_revalidate = 1;
+ GF_ASSERT(ioc_inode);
+ table = ioc_inode->table;
+ GF_ASSERT(table);
- return need_revalidate;
+ return (gf_time() - ioc_inode->cache.last_revalidate >=
+ table->cache_timeout);
}
/*
* __ioc_inode_flush - flush all the cached pages of the given inode
*
- * @ioc_inode:
+ * @ioc_inode:
*
* assumes lock is held
*/
int64_t
-__ioc_inode_flush (ioc_inode_t *ioc_inode)
+__ioc_inode_flush(ioc_inode_t *ioc_inode)
{
- ioc_page_t *curr = NULL, *next = NULL;
- int64_t destroy_size = 0;
- int64_t ret = 0;
-
- list_for_each_entry_safe (curr, next, &ioc_inode->cache.page_lru,
- page_lru) {
- ret = ioc_page_destroy (curr);
-
- if (ret != -1)
- destroy_size += ret;
- }
-
- return destroy_size;
+ ioc_page_t *curr = NULL, *next = NULL;
+ int64_t destroy_size = 0;
+ int64_t ret = 0;
+
+ list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru)
+ {
+ ret = __ioc_page_destroy(curr);
+
+ if (ret != -1)
+ destroy_size += ret;
+ }
+
+ return destroy_size;
}
void
-ioc_inode_flush (ioc_inode_t *ioc_inode)
+ioc_inode_flush(ioc_inode_t *ioc_inode)
{
- int64_t destroy_size = 0;
-
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
- ioc_inode_unlock (ioc_inode);
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- return;
+ int64_t destroy_size = 0;
+
+ ioc_inode_lock(ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush(ioc_inode);
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ if (destroy_size) {
+ ioc_table_lock(ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock(ioc_inode->table);
+ }
+
+ return;
}
int32_t
-ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *preop, struct stat *postop)
+ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
- return 0;
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
int32_t
-ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct stat *stbuf, int32_t valid)
+ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ inode_ctx_get(loc->inode, this, &ioc_inode);
- if (ioc_inode
- && ((valid & GF_SET_ATTR_ATIME)
- || (valid & GF_SET_ATTR_MTIME)))
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode &&
+ ((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME)))
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ return 0;
}
int32_t
-ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *stbuf, dict_t *dict, struct stat *postparent)
+ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf)
{
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = this->private;
- uint8_t cache_still_valid = 0;
- uint64_t tmp_ioc_inode = 0;
-
- if (op_ret != 0)
- goto out;
-
- inode_ctx_get (inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (ioc_inode) {
- ioc_inode_lock (ioc_inode);
- {
- if (ioc_inode->cache.mtime == 0) {
- ioc_inode->cache.mtime = stbuf->st_mtime;
- ioc_inode->cache.mtime_nsec = ST_MTIM_NSEC(stbuf);
- }
- }
- ioc_inode_unlock (ioc_inode);
-
- cache_still_valid = ioc_cache_still_valid (ioc_inode,
- stbuf);
-
- if (!cache_still_valid) {
- ioc_inode_flush (ioc_inode);
- }
-
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
+ ioc_table_t *table = NULL;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ gf_boolean_t cache_still_valid = _gf_false;
+
+ if (!this || !inode)
+ goto out;
+
+ table = this->private;
+
+ LOCK(&inode->lock);
+ {
+ (void)__inode_ctx_get(inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ if (!ioc_inode) {
+ weight = ioc_get_priority(table, path);
+
+ ioc_inode = ioc_inode_create(table, inode, weight);
+
+ (void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ ioc_inode_lock(ioc_inode);
+ {
+ if (ioc_inode->cache.mtime == 0) {
+ ioc_inode->cache.mtime = iabuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec;
+ }
+
+ ioc_inode->ia_size = iabuf->ia_size;
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf);
+
+ if (!cache_still_valid) {
+ ioc_inode_flush(ioc_inode);
+ }
+
+ ioc_table_lock(ioc_inode->table);
+ {
+ list_move_tail(&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock(ioc_inode->table);
+
+out:
+ return 0;
+}
+
+int32_t
+ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
+{
+ ioc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (local == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf);
+
out:
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf, dict,
- postparent);
- return 0;
+ if (frame->local != NULL) {
+ local = frame->local;
+ loc_wipe(&local->file_loc);
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
+ postparent);
+ return 0;
}
-int32_t
-ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+int32_t
+ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
+
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
+
+ ret = loc_copy(&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
+
+ frame->local = local;
- return 0;
+ STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+unwind:
+ if (local != NULL) {
+ loc_wipe(&local->file_loc);
+ mem_put(local);
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
}
/*
- * ioc_forget -
+ * ioc_forget -
*
* @frame:
* @this:
@@ -240,21 +360,33 @@ ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
*
*/
int32_t
-ioc_forget (xlator_t *this, inode_t *inode)
+ioc_forget(xlator_t *this, inode_t *inode)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get(inode, this, &ioc_inode);
- inode_ctx_get (inode, this, &ioc_inode);
+ if (ioc_inode)
+ ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode);
- if (ioc_inode)
- ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
-
- return 0;
+ return 0;
}
+static int32_t
+ioc_invalidate(xlator_t *this, inode_t *inode)
+{
+ uint64_t ioc_inode = 0;
-/*
- * ioc_cache_validate_cbk -
+ inode_ctx_get(inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode);
+
+ return 0;
+}
+
+/*
+ * ioc_cache_validate_cbk -
*
* @frame:
* @cookie:
@@ -265,101 +397,103 @@ ioc_forget (xlator_t *this, inode_t *inode)
*
*/
int32_t
-ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_inode_t *ioc_inode = NULL;
- size_t destroy_size = 0;
- struct stat *local_stbuf = NULL;
+ ioc_local_t *local = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ size_t destroy_size = 0;
+ struct iatt *local_stbuf = NULL;
+
+ local = frame->local;
+ ioc_inode = local->inode;
+ local_stbuf = stbuf;
+
+ if ((op_ret == -1) ||
+ ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_msg_debug(ioc_inode->table->xl->name, 0,
+ "cache for inode(%p) is invalid. flushing all pages",
+ ioc_inode);
+ /* NOTE: only pages with no waiting frames are flushed by
+ * ioc_inode_flush. page_fault will be generated for all
+ * the pages which have waiting frames by ioc_inode_wakeup()
+ */
+ ioc_inode_lock(ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush(ioc_inode);
+ if (op_ret >= 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+ local_stbuf = NULL;
+ }
- local = frame->local;
- ioc_inode = local->inode;
- local_stbuf = stbuf;
-
- if ((op_ret == -1) ||
- ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
- "cache for inode(%p) is invalid. flushing all pages",
- ioc_inode);
- /* NOTE: only pages with no waiting frames are flushed by
- * ioc_inode_flush. page_fault will be generated for all
- * the pages which have waiting frames by ioc_inode_wakeup()
- */
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- if (op_ret >= 0) {
- ioc_inode->cache.mtime = stbuf->st_mtime;
- ioc_inode->cache.mtime_nsec = ST_MTIM_NSEC(stbuf);
- }
- }
- ioc_inode_unlock (ioc_inode);
- local_stbuf = NULL;
- }
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- if (op_ret < 0)
- local_stbuf = NULL;
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->cache.tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
-
- /* any page-fault initiated by ioc_inode_wakeup() will have its own
- * fd_ref on fd, safe to unref validate frame's private copy
- */
- fd_unref (local->fd);
-
- STACK_DESTROY (frame->root);
-
- return 0;
+ if (destroy_size) {
+ ioc_table_lock(ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock(ioc_inode->table);
+ }
+
+ if (op_ret < 0)
+ local_stbuf = NULL;
+
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.last_revalidate = gf_time();
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ ioc_inode_wakeup(frame, ioc_inode, local_stbuf);
+
+ /* any page-fault initiated by ioc_inode_wakeup() will have its own
+ * fd_ref on fd, safe to unref validate frame's private copy
+ */
+ fd_unref(local->fd);
+ dict_unref(local->xattr_req);
+
+ STACK_DESTROY(frame->root);
+
+ return 0;
}
int32_t
-ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
+ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page)
{
- ioc_waitq_t *waiter = NULL, *trav = NULL;
- uint32_t page_found = 0;
- int32_t ret = 0;
-
- trav = ioc_inode->waitq;
-
- while (trav) {
- if (trav->data == page) {
- page_found = 1;
- break;
- }
- trav = trav->next;
- }
-
- if (!page_found) {
- waiter = CALLOC (1, sizeof (ioc_waitq_t));
- if (waiter == NULL) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- ret = -ENOMEM;
- goto out;
- }
+ ioc_waitq_t *waiter = NULL, *trav = NULL;
+ uint32_t page_found = 0;
+ int32_t ret = 0;
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ if (trav->data == page) {
+ page_found = 1;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (!page_found) {
+ waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t);
+ if (waiter == NULL) {
+ gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ ret = -ENOMEM;
+ goto out;
+ }
- waiter->data = page;
- waiter->next = ioc_inode->waitq;
- ioc_inode->waitq = waiter;
- }
+ waiter->data = page;
+ waiter->next = ioc_inode->waitq;
+ ioc_inode->waitq = waiter;
+ }
-out:
- return ret;
+out:
+ return ret;
}
/*
@@ -371,78 +505,80 @@ out:
*
*/
int32_t
-ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- ioc_page_t *page)
+ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
+ ioc_page_t *page)
{
- call_frame_t *validate_frame = NULL;
- ioc_local_t *validate_local = NULL;
- ioc_local_t *local = NULL;
- int32_t ret = 0;
-
- local = frame->local;
- validate_local = CALLOC (1, sizeof (ioc_local_t));
- if (validate_local == NULL) {
- ret = -1;
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- validate_frame = copy_frame (frame);
- if (validate_frame == NULL) {
- ret = -1;
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- FREE (validate_local);
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- validate_local->fd = fd_ref (fd);
- validate_local->inode = ioc_inode;
- validate_frame->local = validate_local;
-
- STACK_WIND (validate_frame, ioc_cache_validate_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->fstat, fd);
+ call_frame_t *validate_frame = NULL;
+ ioc_local_t *validate_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t ret = 0;
+
+ local = frame->local;
+ validate_local = mem_get0(THIS->local_pool);
+ if (validate_local == NULL) {
+ ret = -1;
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ validate_frame = copy_frame(frame);
+ if (validate_frame == NULL) {
+ ret = -1;
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ mem_put(validate_local);
+ gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ validate_local->fd = fd_ref(fd);
+ validate_local->inode = ioc_inode;
+ if (local && local->xattr_req)
+ validate_local->xattr_req = dict_ref(local->xattr_req);
+ validate_frame->local = validate_local;
+
+ STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->fstat, fd,
+ validate_local->xattr_req);
out:
- return ret;
+ return ret;
}
-inline uint32_t
-is_match (const char *path, const char *pattern)
+static uint32_t
+is_match(const char *path, const char *pattern)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
-
- return (ret == 0);
+ ret = fnmatch(pattern, path, FNM_NOESCAPE);
+
+ return (ret == 0);
}
uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path)
+ioc_get_priority(ioc_table_t *table, const char *path)
{
- uint32_t priority = 0;
- struct ioc_priority *curr = NULL;
-
- if (list_empty(&table->priority_list)) {
- priority = 1;
- }
- else {
- list_for_each_entry (curr, &table->priority_list, list) {
- if (is_match (path, curr->pattern))
- priority = curr->priority;
- }
- }
-
- return priority;
+ uint32_t priority = 1;
+ struct ioc_priority *curr = NULL;
+
+ if (list_empty(&table->priority_list) || !path)
+ return priority;
+
+ priority = 0;
+ list_for_each_entry(curr, &table->priority_list, list)
+ {
+ if (is_match(path, curr->pattern))
+ priority = curr->priority;
+ }
+
+ return priority;
}
-/*
+/*
* ioc_open_cbk - open callback for io cache
*
* @frame: call frame
@@ -454,82 +590,68 @@ ioc_get_priority (ioc_table_t *table, const char *path)
*
*/
int32_t
-ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- inode_t *inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+
+ if (op_ret != -1) {
+ inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ // TODO: see why inode context is NULL and handle it.
+ if (!ioc_inode) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
- local = frame->local;
- table = this->private;
- inode = local->file_loc.inode;
- path = local->file_loc.path;
-
- if (op_ret != -1) {
- /* look for ioc_inode corresponding to this fd */
- LOCK (&fd->inode->lock);
- {
- __inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
- if (!ioc_inode) {
- /*
- this is the first time someone is opening
- this file, assign weight
- */
- weight = ioc_get_priority (table, path);
-
- ioc_inode = ioc_inode_update (table, inode,
- weight);
-
- __inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
- } else {
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
- }
+ ioc_table_lock(ioc_inode->table);
+ {
+ list_move_tail(&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock(ioc_inode->table);
+
+ ioc_inode_lock(ioc_inode);
+ {
+ if ((table->min_file_size > ioc_inode->ia_size) ||
+ ((table->max_file_size > 0) &&
+ (table->max_file_size < ioc_inode->ia_size))) {
+ fd_ctx_set(fd, this, 1);
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if ((local->flags & O_DIRECT)) {
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set(fd, this, 1);
+ }
+ }
+
+out:
+ mem_put(local);
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- }
- UNLOCK (&fd->inode->lock);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
- if (((inode->st_mode & S_ISGID)
- && !(inode->st_mode & S_IXGRP))) {
- fd_ctx_set (fd, this, 1);
- }
-
- /* If O_DIRECT open, we disable caching on it */
- if ((local->flags & O_DIRECT)){
- /* O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- FREE (local);
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
-
- return 0;
+ return 0;
}
/*
@@ -546,71 +668,173 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
*
*/
int32_t
-ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+ path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority(table, path);
+
+ ioc_inode = ioc_inode_create(table, inode, weight);
+
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+
+ if ((table->min_file_size > ioc_inode->ia_size) ||
+ ((table->max_file_size > 0) &&
+ (table->max_file_size < ioc_inode->ia_size))) {
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
+ local->file_loc.path, NULL);
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if (local->flags & O_DIRECT) {
+ /*
+ * O_DIRECT is only for one fd, not the inode
+ * as a whole */
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
+ local->file_loc.path, NULL);
+ }
- local = frame->local;
- table = this->private;
- path = local->file_loc.path;
+ /* if weight == 0, we disable caching on it */
+ if (!weight) {
+ /* we allow a pattern-matched cache disable this way */
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
+ local->file_loc.path, NULL);
+ }
+ }
- if (op_ret != -1) {
- {
- /* assign weight */
- weight = ioc_get_priority (table, path);
+out:
+ frame->local = NULL;
+ mem_put(local);
- ioc_inode = ioc_inode_update (table, inode, weight);
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- ioc_inode_lock (ioc_inode);
- {
- ioc_inode->cache.mtime = buf->st_mtime;
- ioc_inode->cache.mtime_nsec = ST_MTIM_NSEC(buf);
- }
- ioc_inode_unlock (ioc_inode);
-
- inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
- }
- /*
- * If mandatory locking has been enabled on this file,
- * we disable caching on it
- */
- if ((inode->st_mode & S_ISGID) &&
- !(inode->st_mode & S_IXGRP)) {
- fd_ctx_set (fd, this, 1);
- }
-
- /* If O_DIRECT open, we disable caching on it */
- if (local->flags & O_DIRECT){
- /*
- * O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- frame->local = NULL;
- FREE (local);
-
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
-
- return 0;
+ return 0;
+}
+
+int32_t
+ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+ path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority(table, path);
+
+ ioc_inode = ioc_inode_create(table, inode, weight);
+
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
+ }
+
+out:
+ frame->local = NULL;
+
+ loc_wipe(&local->file_loc);
+ mem_put(local);
+
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
+
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
+
+ ret = loc_copy(&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+
+unwind:
+ if (local != NULL) {
+ loc_wipe(&local->file_loc);
+ mem_put(local);
+ }
+
+ STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
/*
@@ -622,34 +846,33 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*
*/
int32_t
-ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
-
- ioc_local_t *local = NULL;
-
- local = CALLOC (1, sizeof (ioc_local_t));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ ioc_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
- local->flags = flags;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
-
- frame->local = local;
-
- STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
- return 0;
+ frame->local = local;
+
+ STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+
+ return 0;
}
/*
* ioc_create - create fop for io cache
- *
+ *
* @frame:
* @this:
* @pathname:
@@ -658,240 +881,218 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*
*/
int32_t
-ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- ioc_local_t *local = NULL;
-
- local = CALLOC (1, sizeof (ioc_local_t));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
- }
-
- local->flags = flags;
- local->file_loc.path = loc->path;
- frame->local = local;
-
- STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd);
+ ioc_local_t *local = NULL;
- return 0;
-}
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+ }
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ frame->local = local;
+ STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
/*
* ioc_release - release fop for io cache
- *
+ *
* @frame:
* @this:
* @fd:
*
*/
int32_t
-ioc_release (xlator_t *this, fd_t *fd)
+ioc_release(xlator_t *this, fd_t *fd)
{
- return 0;
+ return 0;
}
-/*
- * ioc_readv_disabled_cbk
- * @frame:
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @vector:
- * @count:
- *
- */
int32_t
-ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf,
- struct iobref *iobref)
+ioc_need_prune(ioc_table_t *table)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
- return 0;
-}
+ int64_t cache_difference = 0;
+ ioc_table_lock(table);
+ {
+ cache_difference = table->cache_used - table->cache_size;
+ }
+ ioc_table_unlock(table);
-int32_t
-ioc_need_prune (ioc_table_t *table)
-{
- int64_t cache_difference = 0;
-
- ioc_table_lock (table);
- {
- cache_difference = table->cache_used - table->cache_size;
- }
- ioc_table_unlock (table);
-
- if (cache_difference > 0)
- return 1;
- else
- return 0;
+ if (cache_difference > 0)
+ return 1;
+ else
+ return 0;
}
/*
* ioc_dispatch_requests -
- *
+ *
* @frame:
* @inode:
*
- *
+ *
*/
void
-ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- off_t offset, size_t size)
+ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
+ off_t offset, size_t size)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *trav = NULL;
- ioc_waitq_t *waitq = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- int32_t fault = 0;
- size_t trav_size = 0;
- off_t local_offset = 0;
- int32_t ret = -1;
- int8_t need_validate = 0;
- int8_t might_need_validate = 0; /*
- * if a page exists, do we need
- * to validate it?
- */
- local = frame->local;
- table = ioc_inode->table;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *trav = NULL;
+ ioc_waitq_t *waitq = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ int32_t fault = 0;
+ size_t trav_size = 0;
+ off_t local_offset = 0;
+ int32_t ret = -1;
+ int8_t need_validate = 0;
+ int8_t might_need_validate = 0; /*
+ * if a page exists, do we need
+ * to validate it?
+ */
+ local = frame->local;
+ table = ioc_inode->table;
+
+ rounded_offset = gf_floor(offset, table->page_size);
+ rounded_end = gf_roof(offset + size, table->page_size);
+ trav_offset = rounded_offset;
+
+ /* once a frame does read, it should be waiting on something */
+ local->wait_count++;
+
+ /* Requested region can fall in three different pages,
+ * 1. Ready - region is already in cache, we just have to serve it.
+ * 2. In-transit - page fault has been generated on this page, we need
+ * to wait till the page is ready
+ * 3. Fault - page is not in cache, we have to generate a page fault
+ */
+
+ might_need_validate = ioc_inode_need_revalidate(ioc_inode);
+
+ while (trav_offset < rounded_end) {
+ ioc_inode_lock(ioc_inode);
+ {
+ /* look for requested region in the cache */
+ trav = __ioc_page_get(ioc_inode, trav_offset);
- rounded_offset = floor (offset, table->page_size);
- rounded_end = roof (offset + size, table->page_size);
- trav_offset = rounded_offset;
-
- /* once a frame does read, it should be waiting on something */
- local->wait_count++;
-
- /* Requested region can fall in three different pages,
- * 1. Ready - region is already in cache, we just have to serve it.
- * 2. In-transit - page fault has been generated on this page, we need
- * to wait till the page is ready
- * 3. Fault - page is not in cache, we have to generate a page fault
- */
-
- might_need_validate = ioc_inode_need_revalidate (ioc_inode);
-
- while (trav_offset < rounded_end) {
- ioc_inode_lock (ioc_inode);
- //{
-
- /* look for requested region in the cache */
- trav = ioc_page_get (ioc_inode, trav_offset);
-
- local_offset = max (trav_offset, offset);
- trav_size = min (((offset+size) - local_offset),
- table->page_size);
-
- if (!trav) {
- /* page not in cache, we need to generate page fault */
- trav = ioc_page_create (ioc_inode, trav_offset);
- fault = 1;
- if (!trav) {
- gf_log (frame->this->name, GF_LOG_CRITICAL,
- "out of memory");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
- }
-
- ioc_wait_on_page (trav, frame, local_offset, trav_size);
-
- if (trav->ready) {
- /* page found in cache */
- if (!might_need_validate && !ioc_inode->waitq) {
- /* fresh enough */
- gf_log (frame->this->name, GF_LOG_TRACE,
- "cache hit for trav_offset=%"PRId64""
- "/local_offset=%"PRId64"",
- trav_offset, local_offset);
- waitq = ioc_page_wakeup (trav);
- } else {
- /* if waitq already exists, fstat revalidate is
- already on the way */
- if (!ioc_inode->waitq) {
- need_validate = 1;
- }
-
- ret = ioc_wait_on_inode (ioc_inode, trav);
- if (ret < 0) {
- local->op_ret = -1;
- local->op_errno = -ret;
- need_validate = 0;
-
- waitq = ioc_page_wakeup (trav);
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
- goto out;
- }
- }
- }
-
- //}
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
-
- if (fault) {
- fault = 0;
- /* new page created, increase the table->cache_used */
- ioc_page_fault (ioc_inode, frame, fd, trav_offset);
- }
-
- if (need_validate) {
- need_validate = 0;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "sending validate request for "
- "inode(%"PRId64") at offset=%"PRId64"",
- fd->inode->ino, trav_offset);
- ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
- if (ret == -1) {
- ioc_inode_lock (ioc_inode);
- {
- waitq = ioc_page_wakeup (trav);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
- goto out;
- }
- }
-
- trav_offset += table->page_size;
- }
+ local_offset = max(trav_offset, offset);
+ trav_size = min(((offset + size) - local_offset), table->page_size);
+
+ if (!trav) {
+ /* page not in cache, we need to generate page
+ * fault
+ */
+ trav = __ioc_page_create(ioc_inode, trav_offset);
+ fault = 1;
+ if (!trav) {
+ gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,
+ IO_CACHE_MSG_NO_MEMORY, NULL);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ ioc_inode_unlock(ioc_inode);
+ goto out;
+ }
+ }
+
+ __ioc_wait_on_page(trav, frame, local_offset, trav_size);
+
+ if (trav->ready) {
+ /* page found in cache */
+ if (!might_need_validate && !ioc_inode->waitq) {
+ /* fresh enough */
+ gf_msg_trace(frame->this->name, 0,
+ "cache hit for "
+ "trav_offset=%" PRId64
+ "/local_"
+ "offset=%" PRId64 "",
+ trav_offset, local_offset);
+ waitq = __ioc_page_wakeup(trav, trav->op_errno);
+ } else {
+ /* if waitq already exists, fstat
+ * revalidate is
+ * already on the way
+ */
+ if (!ioc_inode->waitq) {
+ need_validate = 1;
+ }
+
+ ret = ioc_wait_on_inode(ioc_inode, trav);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ need_validate = 0;
+
+ waitq = __ioc_page_wakeup(trav, trav->op_errno);
+ ioc_inode_unlock(ioc_inode);
+
+ ioc_waitq_return(waitq);
+ waitq = NULL;
+ goto out;
+ }
+ }
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ ioc_waitq_return(waitq);
+ waitq = NULL;
+
+ if (fault) {
+ fault = 0;
+ /* new page created, increase the table->cache_used */
+ ioc_page_fault(ioc_inode, frame, fd, trav_offset);
+ }
+
+ if (need_validate) {
+ need_validate = 0;
+ gf_msg_trace(frame->this->name, 0,
+ "sending validate request for "
+ "inode(%s) at offset=%" PRId64 "",
+ uuid_utoa(fd->inode->gfid), trav_offset);
+ ret = ioc_cache_validate(frame, ioc_inode, fd, trav);
+ if (ret == -1) {
+ ioc_inode_lock(ioc_inode);
+ {
+ waitq = __ioc_page_wakeup(trav, trav->op_errno);
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ ioc_waitq_return(waitq);
+ waitq = NULL;
+ goto out;
+ }
+ }
+
+ trav_offset += table->page_size;
+ }
out:
- ioc_frame_return (frame);
+ ioc_frame_return(frame);
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune(ioc_inode->table)) {
+ ioc_prune(ioc_inode->table);
+ }
- return;
+ return;
}
-
/*
* ioc_readv -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -900,131 +1101,113 @@ out:
*
*/
int32_t
-ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = NULL;
- uint32_t weight = 0;
- ioc_table_t *table = NULL;
- uint32_t num_pages = 0;
- int32_t op_errno = -1;
-
- if (!this) {
- goto out;
- }
-
- inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (!ioc_inode) {
- /* caching disabled, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
-
- table = this->private;
-
- if (!table) {
- gf_log (this->name, GF_LOG_ERROR, "table is null");
- op_errno = EINVAL;
- goto out;
- }
-
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = NULL;
+ uint32_t weight = 0;
+ ioc_table_t *table = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!this) {
+ goto out;
+ }
+
+ inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (!ioc_inode) {
+ /* caching disabled, go ahead with normal readv */
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
- ioc_table_lock (table);
- {
- if (!table->mem_pool) {
+ if (flags & O_DIRECT) {
+ /* disable caching for this fd, if O_DIRECT is used */
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
- num_pages = (table->cache_size / table->page_size)
- + ((table->cache_size % table->page_size)
- ? 1 : 0);
+ table = this->private;
- table->mem_pool
- = mem_pool_new (rbthash_entry_t, num_pages);
+ if (!table) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL,
+ NULL);
+ op_errno = EINVAL;
+ goto out;
+ }
- if (!table->mem_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to allocate mem_pool");
- op_errno = ENOMEM;
- ioc_table_unlock (table);
- goto out;
- }
- }
- }
- ioc_table_unlock (table);
+ ioc_inode_lock(ioc_inode);
+ {
+ if (!ioc_inode->cache.page_table) {
+ ioc_inode->cache.page_table = rbthash_table_init(
+ this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0,
+ table->mem_pool);
- ioc_inode_lock (ioc_inode);
- {
- if (!ioc_inode->cache.page_table) {
- ioc_inode->cache.page_table
- = rbthash_table_init
- (IOC_PAGE_TABLE_BUCKET_COUNT,
- ioc_hashfn, NULL, 0,
- table->mem_pool);
-
- if (ioc_inode->cache.page_table == NULL) {
- op_errno = ENOMEM;
- ioc_inode_unlock (ioc_inode);
- goto out;
- }
- }
- }
- ioc_inode_unlock (ioc_inode);
-
- if (!fd_ctx_get (fd, this, NULL)) {
- /* disable caching for this fd, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
- local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ if (ioc_inode->cache.page_table == NULL) {
op_errno = ENOMEM;
+ ioc_inode_unlock(ioc_inode);
goto out;
+ }
}
-
- INIT_LIST_HEAD (&local->fill_list);
-
- frame->local = local;
- local->pending_offset = offset;
- local->pending_size = size;
- local->offset = offset;
- local->size = size;
- local->inode = ioc_inode;
-
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
- frame, offset, size);
-
- weight = ioc_inode->weight;
-
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &ioc_inode->table->inode_lru[weight]);
- }
- ioc_table_unlock (ioc_inode->table);
-
- ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
- return 0;
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ if (!fd_ctx_get(fd, this, NULL)) {
+ /* disable caching for this fd, go ahead with normal readv */
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&local->fill_list);
+
+ frame->local = local;
+ local->pending_offset = offset;
+ local->pending_size = size;
+ local->offset = offset;
+ local->size = size;
+ local->inode = ioc_inode;
+ local->xattr_req = dict_ref(xdata);
+
+ gf_msg_trace(this->name, 0,
+ "NEW REQ (%p) offset "
+ "= %" PRId64 " && size = %" GF_PRI_SIZET "",
+ frame, offset, size);
+
+ weight = ioc_inode->weight;
+
+ ioc_table_lock(ioc_inode->table);
+ {
+ list_move_tail(&ioc_inode->inode_lru,
+ &ioc_inode->table->inode_lru[weight]);
+ }
+ ioc_table_unlock(ioc_inode->table);
+
+ ioc_dispatch_requests(frame, ioc_inode, fd, offset, size);
+ return 0;
out:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
}
/*
* ioc_writev_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1033,26 +1216,36 @@ out:
*
*/
int32_t
-ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
-
- local = frame->local;
- inode_ctx_get (local->fd->inode, this, &ioc_inode);
-
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
-
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
+
+ local = frame->local;
+ frame->local = NULL;
+ inode_ctx_get(local->fd->inode, this, &ioc_inode);
+
+ if (op_ret >= 0) {
+ ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector,
+ local->op_ret, op_ret, local->offset);
+ }
+
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ if (local->iobref) {
+ iobref_unref(local->iobref);
+ GF_FREE(local->vector);
+ }
+
+ mem_put(local);
+ return 0;
}
/*
* ioc_writev
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1062,39 +1255,43 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*
*/
int32_t
-ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
- local = CALLOC (1, sizeof (ioc_local_t));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ local = mem_get0(this->local_pool);
+ if (local == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- /* TODO: why is it not fd_ref'ed */
- local->fd = fd;
- frame->local = local;
+ /* TODO: why is it not fd_ref'ed */
+ local->fd = fd;
+ frame->local = local;
- inode_ctx_get (fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get(fd->inode, this, &ioc_inode);
+ if (ioc_inode) {
+ local->iobref = iobref_ref(iobref);
+ local->vector = iov_dup(vector, count);
+ local->op_ret = count;
+ local->offset = offset;
+ }
- STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- iobref);
+ STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
}
/*
* ioc_truncate_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1103,18 +1300,16 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
* @buf:
*
*/
-int32_t
-ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+int32_t
+ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
-
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
/*
* ioc_ftruncate_cbk -
*
@@ -1127,43 +1322,43 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*
*/
int32_t
-ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
-
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
/*
* ioc_truncate -
- *
+ *
* @frame:
* @this:
* @loc:
* @offset:
*
*/
-int32_t
-ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+int32_t
+ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get(loc->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- return 0;
+ STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
/*
* ioc_ftruncate -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1171,354 +1366,866 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
*
*/
int32_t
-ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (fd->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get(fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
- STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
+int32_t
+ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int32_t
-ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
+ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
- return 0;
+ ioc_inode_t *ioc_inode = NULL;
+ uint64_t tmp_inode = 0;
+
+ inode_ctx_get(fd->inode, this, &tmp_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_inode;
+ if (!ioc_inode) {
+ gf_msg_debug(this->name, EBADFD,
+ "inode context is NULL: returning EBADFD");
+ STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL);
+ return 0;
+ }
+
+ ioc_inode_lock(ioc_inode);
+ {
+ ioc_inode->cache.last_revalidate = gf_time();
+ }
+ ioc_inode_unlock(ioc_inode);
+
+ STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+
+ return 0;
}
-int32_t
-ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+int
+ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- ioc_inode_t *ioc_inode = NULL;
- uint64_t tmp_inode = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_inode;
- if (!ioc_inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "inode context is NULL: returning EBADFD");
- STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL);
- return 0;
- }
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->cache.tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lk, fd, cmd, lock);
-
- return 0;
+ gf_dirent_t *entry = NULL;
+ char *path = NULL;
+ fd_t *fd = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ inode_path(fd->inode, entry->d_name, &path);
+ ioc_inode_update(this, entry->inode, path, &entry->d_stat);
+ GF_FREE(path);
+ path = NULL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+int
+ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ frame->local = fd;
+
+ STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+
+ return 0;
+}
+
+static int32_t
+ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get(fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get(fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
}
int32_t
-ioc_get_priority_list (const char *opt_str, struct list_head *first)
+ioc_get_priority_list(const char *opt_str, struct list_head *first)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct ioc_priority *curr = NULL, *tmp = NULL;
-
- string = strdup (opt_str);
- if (string == NULL) {
- max_pri = -1;
- goto out;
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+
+ string = gf_strdup(opt_str);
+ if (string == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ stripe_str = strtok_r(string, ",", &tmp_str);
+ while (stripe_str) {
+ curr = GF_CALLOC(1, sizeof(struct ioc_priority),
+ gf_ioc_mt_ioc_priority);
+ if (curr == NULL) {
+ max_pri = -1;
+ goto out;
}
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- stripe_str = strtok_r (string, ",", &tmp_str);
- while (stripe_str) {
- curr = CALLOC (1, sizeof (struct ioc_priority));
- if (curr == NULL) {
- max_pri = -1;
- goto out;
- }
- list_add_tail (&curr->list, first);
+ list_add_tail(&curr->list, first);
- dup_str = strdup (stripe_str);
- if (dup_str == NULL) {
- max_pri = -1;
- goto out;
- }
+ dup_str = gf_strdup(stripe_str);
+ if (dup_str == NULL) {
+ max_pri = -1;
+ goto out;
+ }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
- max_pri = -1;
- goto out;
- }
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ if (!pattern) {
+ max_pri = -1;
+ goto out;
+ }
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
- max_pri = -1;
- goto out;
- }
+ priority = strtok_r(NULL, ":", &tmp_str1);
+ if (!priority) {
+ max_pri = -1;
+ goto out;
+ }
- gf_log ("io-cache", GF_LOG_TRACE,
- "ioc priority : pattern %s : priority %s",
- pattern,
- priority);
+ gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s",
+ pattern, priority);
- curr->pattern = strdup (pattern);
- if (curr->pattern == NULL) {
- max_pri = -1;
- goto out;
- }
+ curr->pattern = gf_strdup(pattern);
+ if (curr->pattern == NULL) {
+ max_pri = -1;
+ goto out;
+ }
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
- max_pri = -1;
- goto out;
- } else {
- max_pri = max (max_pri, curr->priority);
- }
+ curr->priority = strtol(priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
+ max_pri = -1;
+ goto out;
+ } else {
+ max_pri = max(max_pri, curr->priority);
+ }
+
+ GF_FREE(dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r(NULL, ",", &tmp_str);
+ }
+out:
+ GF_FREE(string);
+
+ GF_FREE(dup_str);
+
+ if (max_pri == -1) {
+ list_for_each_entry_safe(curr, tmp, first, list)
+ {
+ list_del_init(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
+ }
+ }
+
+ return max_pri;
+}
- free (dup_str);
- dup_str = NULL;
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_ioc_mt_end + 1);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL);
+ return ret;
+ }
+
+ return ret;
+}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- }
+static gf_boolean_t
+check_cache_size_ok(xlator_t *this, uint64_t cache_size)
+{
+ gf_boolean_t ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT(this);
+ opt = xlator_volume_option_get(this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL);
+ goto out;
+ }
+
+ total_mem = get_mem_size();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);
+
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
+ "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64,
+ max_cache_size, NULL);
+ goto out;
+ }
out:
- if (string != NULL) {
- free (string);
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ data_t *data = NULL;
+ ioc_table_t *table = NULL;
+ int ret = -1;
+ uint64_t cache_size_new = 0;
+ if (!this || !this->private)
+ goto out;
+
+ table = this->private;
+
+ ioc_table_lock(table);
+ {
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool,
+ unlock);
+
+ GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, int32,
+ unlock);
+
+ data = dict_get(options, "priority");
+ if (data) {
+ char *option_list = data_to_str(data);
+
+ gf_msg_trace(this->name, 0, "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list(option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
+ goto unlock;
+ }
+ table->max_pri++;
}
- if (dup_str != NULL) {
- free (dup_str);
+ GF_OPTION_RECONF("max-file-size", table->max_file_size, options,
+ size_uint64, unlock);
+
+ GF_OPTION_RECONF("min-file-size", table->min_file_size, options,
+ size_uint64, unlock);
+
+ if ((table->max_file_size <= UINT64_MAX) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD,
+ "minimum-size=%" PRIu64, table->min_file_size,
+ "maximum-size=%" PRIu64, table->max_file_size, NULL);
+ goto unlock;
}
- if (max_pri == -1) {
- list_for_each_entry_safe (curr, tmp, first, list) {
- list_del_init (&curr->list);
- free (curr->pattern);
- free (curr);
- }
+ GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64,
+ unlock);
+ if (!check_cache_size_ok(this, cache_size_new)) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL);
+ goto unlock;
}
+ table->cache_size = cache_size_new;
- return max_pri;
+ ret = 0;
+ }
+unlock:
+ ioc_table_unlock(table);
+out:
+ return ret;
}
/*
- * init -
+ * init -
* @this:
*
*/
-int32_t
-init (xlator_t *this)
+int32_t
+init(xlator_t *this)
{
- ioc_table_t *table = NULL;
- dict_t *options = this->options;
- uint32_t index = 0;
- char *cache_size_string = NULL;
- int32_t ret = -1;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: io-cache not configured with exactly "
- "one child");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- table = (void *) CALLOC (1, sizeof (*table));
- if (table == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
+ ioc_table_t *table = NULL;
+ dict_t *xl_options = NULL;
+ uint32_t index = 0;
+ int32_t ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ data_t *data = 0;
+ uint32_t num_pages = 0;
+
+ xl_options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED,
+ NULL);
+ }
+
+ table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t);
+ if (table == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ table->xl = this;
+ table->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out);
+
+ GF_OPTION_INIT("cache-timeout", table->cache_timeout, int32, out);
+
+ GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out);
+
+ GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out);
+
+ if (!check_cache_size_ok(this, table->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&table->priority_list);
+ table->max_pri = 1;
+ data = dict_get(xl_options, "priority");
+ if (data) {
+ char *option_list = data_to_str(data);
+ gf_msg_trace(this->name, 0, "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list(option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
+ goto out;
}
-
- table->xl = this;
- table->page_size = this->ctx->page_size;
- table->cache_size = IOC_CACHE_SIZE;
-
- if (dict_get (options, "cache-size"))
- cache_size_string = data_to_str (dict_get (options,
- "cache-size"));
- if (cache_size_string) {
- if (gf_string2bytesize (cache_size_string,
- &table->cache_size) != 0) {
- gf_log ("io-cache", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option cache-size\"",
- cache_size_string);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "using cache-size %"PRIu64"", table->cache_size);
- }
-
- table->cache_timeout = 1;
-
- if (dict_get (options, "cache-timeout")) {
- table->cache_timeout =
- data_to_uint32 (dict_get (options,
- "cache-timeout"));
- gf_log (this->name, GF_LOG_TRACE,
- "Using %d seconds to revalidate cache",
- table->cache_timeout);
- }
-
- INIT_LIST_HEAD (&table->priority_list);
- table->max_pri = 1;
- if (dict_get (options, "priority")) {
- char *option_list = data_to_str (dict_get (options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- table->max_pri = ioc_get_priority_list (option_list,
- &table->priority_list);
-
- if (table->max_pri == -1) {
- goto out;
- }
- }
- table->max_pri ++;
- INIT_LIST_HEAD (&table->inodes);
-
- table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head));
- if (table->inode_lru == NULL) {
- goto out;
+ }
+ table->max_pri++;
+
+ INIT_LIST_HEAD(&table->inodes);
+
+ if ((table->max_file_size <= UINT64_MAX) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
+ "minimum-size=%" PRIu64, table->min_file_size,
+ "maximum-size=%" PRIu64, table->max_file_size, NULL);
+ goto out;
+ }
+
+ table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head),
+ gf_ioc_mt_list_head);
+ if (table->inode_lru == NULL) {
+ goto out;
+ }
+
+ for (index = 0; index < (table->max_pri); index++)
+ INIT_LIST_HEAD(&table->inode_lru[index]);
+
+ this->local_pool = mem_pool_new(ioc_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL);
+ goto out;
+ }
+
+ pthread_mutex_init(&table->table_lock, NULL);
+ this->private = table;
+
+ num_pages = (table->cache_size / table->page_size) +
+ ((table->cache_size % table->page_size) ? 1 : 0);
+
+ table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages);
+ if (!table->mem_pool) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL);
+ goto out;
+ }
+
+ ret = 0;
+
+ ctx = this->ctx;
+ ioc_log2_page_size = log_base2(ctx->page_size);
+
+out:
+ if (ret == -1) {
+ if (table != NULL) {
+ GF_FREE(table->inode_lru);
+ GF_FREE(table);
}
+ }
- for (index = 0; index < (table->max_pri); index++)
- INIT_LIST_HEAD (&table->inode_lru[index]);
+ return ret;
+}
- pthread_mutex_init (&table->table_lock, NULL);
- this->private = table;
- ret = 0;
+void
+ioc_page_waitq_dump(ioc_page_t *page, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ trav = page->waitq;
+
+ while (trav) {
+ frame = trav->data;
+ sprintf(key, "waitq.frame[%d]", i++);
+ gf_proc_dump_write(key, "%" PRId64, frame->root->unique);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ ioc_page_t *page = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
- ctx = this->ctx;
- ioc_log2_page_size = log_base2 (ctx->page_size);
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ page = trav->data;
+
+ sprintf(key, "cache-validation-waitq.page[%d].offset", i++);
+ gf_proc_dump_write(key, "%" PRId64, page->offset);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_page_dump(ioc_page_t *page, char *prefix)
+{
+ int ret = -1;
+
+ if (!page)
+ return;
+ /* ioc_page_lock can be used to hold the mutex. But in statedump
+ * its better to use trylock to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock(&page->page_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write("offset", "%" PRId64, page->offset);
+ gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);
+ gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");
+ ioc_page_waitq_dump(page, prefix);
+ }
+ pthread_mutex_unlock(&page->page_lock);
out:
- if (ret == -1) {
- if (table != NULL) {
- free (table->inode_lru);
- free (table);
- }
- }
+ if (ret && page)
+ gf_proc_dump_write("Unable to dump the page information",
+ "(Lock acquisition failed) %p", page);
- return ret;
+ return;
}
-int
-ioc_priv_dump (xlator_t *this)
+void
+__ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
{
- ioc_table_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
+ off_t offset = 0;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ if ((ioc_inode == NULL) || (prefix == NULL)) {
+ goto out;
+ }
+
+ table = ioc_inode->table;
+
+ if (ioc_inode->cache.last_revalidate) {
+ gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate,
+ gf_timefmt_FT);
+
+ gf_proc_dump_write("last-cache-validation-time", "%s", timestr);
+ }
+
+ for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) {
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page == NULL) {
+ continue;
+ }
- assert (this);
- priv = this->private;
+ sprintf(key, "inode.cache.page[%d]", i++);
+ __ioc_page_dump(page, key);
+ }
+out:
+ return;
+}
- assert (priv);
+int
+ioc_inode_dump(xlator_t *this, inode_t *inode)
+{
+ char *path = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ gf_boolean_t section_added = _gf_false;
+ char uuid_str[64] = {
+ 0,
+ };
+
+ if (this == NULL || inode == NULL)
+ goto out;
+
+ gf_proc_dump_build_key(key_prefix, "io-cache", "inode");
+
+ inode_ctx_get(inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (ioc_inode == NULL)
+ goto out;
+
+ /* Similar to ioc_page_dump function its better to use
+ * pthread_mutex_trylock and not to use gf_log in statedump
+ * to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock(&ioc_inode->inode_lock);
+ if (ret)
+ goto out;
+
+ {
+ if (gf_uuid_is_null(ioc_inode->inode->gfid))
+ goto unlock;
+
+ gf_proc_dump_add_section("%s", key_prefix);
+ section_added = _gf_true;
+
+ __inode_path(ioc_inode->inode, NULL, &path);
+
+ gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight);
+
+ if (path) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
- gf_proc_dump_build_key (key_prefix, "xlator.performance.io-cache",
- "priv");
- gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write("uuid", "%s",
+ uuid_utoa_r(ioc_inode->inode->gfid, uuid_str));
+ __ioc_cache_dump(ioc_inode, key_prefix);
+ __ioc_inode_waitq_dump(ioc_inode, key_prefix);
+ }
+unlock:
+ pthread_mutex_unlock(&ioc_inode->inode_lock);
- gf_proc_dump_build_key (key, key_prefix, "page_size");
- gf_proc_dump_write (key, "%ld", priv->page_size);
- gf_proc_dump_build_key (key, key_prefix, "cache_size");
- gf_proc_dump_write (key, "%ld", priv->cache_size);
- gf_proc_dump_build_key (key, key_prefix, "cache_used");
- gf_proc_dump_write (key, "%ld", priv->cache_used);
- gf_proc_dump_build_key (key, key_prefix, "inode_count");
- gf_proc_dump_write (key, "%u", priv->inode_count);
+out:
+ if (ret && ioc_inode) {
+ if (section_added == _gf_false)
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("Unable to print the status of ioc_inode",
+ "(Lock acquisition failed) %s",
+ uuid_utoa(inode->gfid));
+ }
+ return ret;
+}
- return 0;
+int
+ioc_priv_dump(xlator_t *this)
+{
+ ioc_table_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = -1;
+ gf_boolean_t add_section = _gf_false;
+
+ if (!this || !this->private)
+ goto out;
+
+ priv = this->private;
+
+ gf_proc_dump_build_key(key_prefix, "io-cache", "priv");
+ gf_proc_dump_add_section("%s", key_prefix);
+ add_section = _gf_true;
+
+ ret = pthread_mutex_trylock(&priv->table_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size);
+ gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size);
+ gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used);
+ gf_proc_dump_write("inode_count", "%u", priv->inode_count);
+ gf_proc_dump_write("cache_timeout", "%u", priv->cache_timeout);
+ gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size);
+ gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size);
+ }
+ pthread_mutex_unlock(&priv->table_lock);
+out:
+ if (ret && priv) {
+ if (!add_section) {
+ gf_proc_dump_build_key(key_prefix,
+ "xlator."
+ "performance.io-cache",
+ "priv");
+ gf_proc_dump_add_section("%s", key_prefix);
+ }
+ gf_proc_dump_write(
+ "Unable to dump the state of private "
+ "structure of io-cache xlator",
+ "(Lock "
+ "acquisition failed) %s",
+ this->name);
+ }
+
+ return 0;
}
/*
* fini -
- *
+ *
* @this:
*
*/
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- ioc_table_t *table = this->private;
-
- if (table == NULL)
- return;
-
- if (table->mem_pool != NULL) {
- mem_pool_destroy (table->mem_pool);
- table->mem_pool = NULL;
- }
-
- pthread_mutex_destroy (&table->table_lock);
- FREE (table);
-
- this->private = NULL;
- return;
+ ioc_table_t *table = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+
+ table = this->private;
+
+ if (table == NULL)
+ return;
+
+ this->private = NULL;
+
+ if (table->mem_pool != NULL) {
+ mem_pool_destroy(table->mem_pool);
+ table->mem_pool = NULL;
+ }
+
+ list_for_each_entry_safe(curr, tmp, &table->priority_list, list)
+ {
+ list_del_init(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
+ }
+
+ /* inode_lru and inodes list can be empty in case fini() is
+ * called soon after init()? Hence commenting the below asserts.
+ */
+ /*for (i = 0; i < table->max_pri; i++) {
+ GF_ASSERT (list_empty (&table->inode_lru[i]));
+ }
+
+ GF_ASSERT (list_empty (&table->inodes));
+ */
+ pthread_mutex_destroy(&table->table_lock);
+ GF_FREE(table);
+
+ this->private = NULL;
+ return;
}
struct xlator_fops fops = {
- .open = ioc_open,
- .create = ioc_create,
- .readv = ioc_readv,
- .writev = ioc_writev,
- .truncate = ioc_truncate,
- .ftruncate = ioc_ftruncate,
- .lookup = ioc_lookup,
- .lk = ioc_lk,
- .setattr = ioc_setattr
+ .open = ioc_open,
+ .create = ioc_create,
+ .readv = ioc_readv,
+ .writev = ioc_writev,
+ .truncate = ioc_truncate,
+ .ftruncate = ioc_ftruncate,
+ .lookup = ioc_lookup,
+ .lk = ioc_lk,
+ .setattr = ioc_setattr,
+ .mknod = ioc_mknod,
+
+ .readdirp = ioc_readdirp,
+ .discard = ioc_discard,
+ .zerofill = ioc_zerofill,
};
-struct xlator_mops mops = {
-};
-
-
struct xlator_dumpops dumpops = {
- .priv = ioc_priv_dump,
+ .priv = ioc_priv_dump,
+ .inodectx = ioc_inode_dump,
};
struct xlator_cbks cbks = {
- .forget = ioc_forget,
- .release = ioc_release
+ .forget = ioc_forget,
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"cache-timeout", "force-revalidate-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 60
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 4 * GF_UNIT_MB,
- .max = 6 * GF_UNIT_GB
- },
- { .key = {NULL} },
+ {
+ .key = {"io-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable io-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"priority"},
+ .type = GF_OPTION_TYPE_PRIORITY_LIST,
+ .default_value = "",
+ .description = "Assigns priority to filenames with specific "
+ "patterns so that when a page needs to be ejected "
+ "out of the cache, the page of a file whose "
+ "priority is the lowest will be ejected earlier",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"cache-timeout", "force-revalidate-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60,
+ .default_value = "1",
+ .description = "The cached data for a file will be retained for "
+ "'cache-refresh-timeout' seconds, after which data "
+ "re-validation is performed.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4 * GF_UNIT_MB,
+ .max = INFINITY,
+ .default_value = "32MB",
+ .description = "Size of the read cache.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Minimum file size which would be cached by the "
+ "io-cache translator.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Maximum file size which would be cached by the "
+ "io-cache translator.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-cache"},
+ .description = "Enable/Disable io cache translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-cache",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
index 63c2609ec7f..14923c75edc 100644
--- a/xlators/performance/io-cache/src/io-cache.h
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -1,46 +1,29 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IO_CACHE_H
#define __IO_CACHE_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/types.h>
-#include "compat-errno.h"
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "call-stub.h"
-#include "rbthash.h"
-#include "hashfn.h"
+#include <glusterfs/compat-errno.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/rbthash.h>
#include <sys/time.h>
#include <fnmatch.h>
+#include "io-cache-messages.h"
-#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
-#define IOC_CACHE_SIZE (32 * 1024 * 1024)
+#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
+#define IOC_CACHE_SIZE (32 * 1024 * 1024)
#define IOC_PAGE_TABLE_BUCKET_COUNT 1
struct ioc_table;
@@ -49,130 +32,135 @@ struct ioc_page;
struct ioc_inode;
struct ioc_priority {
- struct list_head list;
- char *pattern;
- uint32_t priority;
+ struct list_head list;
+ char *pattern;
+ uint32_t priority;
};
/*
- * ioc_waitq - this structure is used to represents the waiting
+ * ioc_waitq - this structure is used to represents the waiting
* frames on a page
*
* @next: pointer to next object in waitq
* @data: pointer to the frame which is waiting
*/
struct ioc_waitq {
- struct ioc_waitq *next;
- void *data;
- off_t pending_offset;
- size_t pending_size;
+ struct ioc_waitq *next;
+ void *data;
+ off_t pending_offset;
+ size_t pending_size;
};
/*
- * ioc_fill -
+ * ioc_fill -
*
*/
struct ioc_fill {
- struct list_head list; /* list of ioc_fill structures of a frame */
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
- struct iobref *iobref;
+ struct list_head list; /* list of ioc_fill structures of a frame */
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
};
struct ioc_local {
- mode_t mode;
- int32_t flags;
- loc_t file_loc;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- struct list_head fill_list; /* list of ioc_fill structures */
- off_t pending_offset; /*
- * offset from this frame should
- * continue
- */
- size_t pending_size; /*
- * size of data this frame is waiting
- * on
- */
- struct ioc_inode *inode;
- int32_t wait_count;
- pthread_mutex_t local_lock;
- struct ioc_waitq *waitq;
- void *stub;
- fd_t *fd;
- int32_t need_xattr;
- dict_t *xattr_req;
+ mode_t mode;
+ int32_t flags;
+ loc_t file_loc;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ struct list_head fill_list; /* list of ioc_fill structures */
+ off_t pending_offset; /*
+ * offset from this frame should
+ * continue
+ */
+ size_t pending_size; /*
+ * size of data this frame is waiting
+ * on
+ */
+ struct ioc_inode *inode;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
+ struct ioc_waitq *waitq;
+ void *stub;
+ fd_t *fd;
+ struct iovec *vector;
+ struct iobref *iobref;
+ int32_t need_xattr;
+ dict_t *xattr_req;
};
/*
- * ioc_page - structure to store page of data from file
+ * ioc_page - structure to store page of data from file
*
*/
struct ioc_page {
- struct list_head page_lru;
- struct ioc_inode *inode; /* inode this page belongs to */
- struct ioc_priority *priority;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ioc_waitq *waitq;
- struct iobref *iobref;
- pthread_mutex_t page_lock;
+ struct list_head page_lru;
+ struct ioc_inode *inode; /* inode this page belongs to */
+ struct ioc_priority *priority;
+ char dirty;
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ioc_waitq *waitq;
+ struct iobref *iobref;
+ pthread_mutex_t page_lock;
+ int32_t op_errno;
+ char stale;
};
struct ioc_cache {
- rbthash_table_t *page_table;
- struct list_head page_lru;
- uint32_t mtime; /*
- * seconds component of file mtime on
- * server
- */
- uint32_t mtime_nsec; /* nanosecond component of file mtime
- * on server
- */
- struct timeval tv; /*
- * time-stamp at last re-validate
- */
+ rbthash_table_t *page_table;
+ struct list_head page_lru;
+ time_t mtime; /*
+ * seconds component of file mtime
+ */
+ time_t mtime_nsec; /*
+ * nanosecond component of file mtime
+ */
+ time_t last_revalidate; /* timestamp at last re-validate */
};
struct ioc_inode {
- struct ioc_table *table;
- struct ioc_cache cache;
- struct list_head inode_list; /*
- * list of inodes, maintained by
- * io-cache translator
- */
- struct list_head inode_lru;
- struct ioc_waitq *waitq;
- pthread_mutex_t inode_lock;
- uint32_t weight; /*
- * weight of the inode, increases
- * on each read
- */
+ struct ioc_table *table;
+ off_t ia_size;
+ struct ioc_cache cache;
+ struct list_head inode_list; /*
+ * list of inodes, maintained by
+ * io-cache translator
+ */
+ struct list_head inode_lru;
+ struct ioc_waitq *waitq;
+ pthread_mutex_t inode_lock;
+ uint32_t weight; /*
+ * weight of the inode, increases
+ * on each read
+ */
+ inode_t *inode;
};
struct ioc_table {
- uint64_t page_size;
- uint64_t cache_size;
- uint64_t cache_used;
- struct list_head inodes; /* list of inodes cached */
- struct list_head active;
- struct list_head *inode_lru;
- struct list_head priority_list;
- int32_t readv_count;
- pthread_mutex_t table_lock;
- xlator_t *xl;
- uint32_t inode_count;
- int32_t cache_timeout;
- int32_t max_pri;
- struct mem_pool *mem_pool;
+ uint64_t page_size;
+ uint64_t cache_size;
+ uint64_t cache_used;
+ uint64_t min_file_size;
+ uint64_t max_file_size;
+ struct list_head inodes; /* list of inodes cached */
+ struct list_head active;
+ struct list_head *inode_lru;
+ struct list_head priority_list;
+ int32_t readv_count;
+ pthread_mutex_t table_lock;
+ xlator_t *xl;
+ uint32_t inode_count;
+ int32_t cache_timeout;
+ int32_t max_pri;
+ struct mem_pool *mem_pool;
};
typedef struct ioc_table ioc_table_t;
@@ -183,159 +171,136 @@ typedef struct ioc_waitq ioc_waitq_t;
typedef struct ioc_fill ioc_fill_t;
void *
-str_to_ptr (char *string);
+str_to_ptr(char *string);
char *
-ptr_to_str (void *ptr);
+ptr_to_str(void *ptr);
-int32_t
-ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf,
- struct iobref *iobref);
+int32_t
+ioc_readv_disabled_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata);
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_get(ioc_inode_t *ioc_inode, off_t offset);
ioc_page_t *
-ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_create(ioc_inode_t *ioc_inode, off_t offset);
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset);
+ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset);
void
-ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
+__ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size);
ioc_waitq_t *
-ioc_page_wakeup (ioc_page_t *page);
+__ioc_page_wakeup(ioc_page_t *page, int32_t op_errno);
void
-ioc_page_flush (ioc_page_t *page);
+ioc_page_flush(ioc_page_t *page);
ioc_waitq_t *
-ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
-
-void
-ioc_page_purge (ioc_page_t *page);
+__ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno);
void
-ioc_frame_return (call_frame_t *frame);
+ioc_frame_return(call_frame_t *frame);
void
-ioc_waitq_return (ioc_waitq_t *waitq);
+ioc_waitq_return(ioc_waitq_t *waitq);
int32_t
-ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
-
-#define ioc_inode_lock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "locked inode(%p)", ioc_inode); \
- pthread_mutex_lock (&ioc_inode->inode_lock); \
- } while (0)
-
-
-#define ioc_inode_unlock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked inode(%p)", ioc_inode); \
- pthread_mutex_unlock (&ioc_inode->inode_lock); \
- } while (0)
-
-
-#define ioc_table_lock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "locked table(%p)", table); \
- pthread_mutex_lock (&table->table_lock); \
- } while (0)
-
-
-#define ioc_table_unlock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "unlocked table(%p)", table); \
- pthread_mutex_unlock (&table->table_lock); \
- } while (0)
-
-
-#define ioc_local_lock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "locked local(%p)", local); \
- pthread_mutex_lock (&local->local_lock); \
- } while (0)
-
-
-#define ioc_local_unlock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked local(%p)", local); \
- pthread_mutex_unlock (&local->local_lock); \
- } while (0)
-
-
-#define ioc_page_lock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "locked page(%p)", page); \
- pthread_mutex_lock (&page->page_lock); \
- } while (0)
-
-
-#define ioc_page_unlock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked page(%p)", page); \
- pthread_mutex_unlock (&page->page_lock); \
- } while (0)
-
-
-static inline uint64_t
-time_elapsed (struct timeval *now,
- struct timeval *then)
-{
- uint64_t sec = now->tv_sec - then->tv_sec;
-
- if (sec)
- return sec;
-
- return 0;
-}
+ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size,
+ int32_t op_errno);
+
+#define ioc_inode_lock(ioc_inode) \
+ do { \
+ gf_msg_trace(ioc_inode->table->xl->name, 0, "locked inode(%p)", \
+ ioc_inode); \
+ pthread_mutex_lock(&ioc_inode->inode_lock); \
+ } while (0)
+
+#define ioc_inode_unlock(ioc_inode) \
+ do { \
+ gf_msg_trace(ioc_inode->table->xl->name, 0, "unlocked inode(%p)", \
+ ioc_inode); \
+ pthread_mutex_unlock(&ioc_inode->inode_lock); \
+ } while (0)
+
+#define ioc_table_lock(table) \
+ do { \
+ gf_msg_trace(table->xl->name, 0, "locked table(%p)", table); \
+ pthread_mutex_lock(&table->table_lock); \
+ } while (0)
+
+#define ioc_table_unlock(table) \
+ do { \
+ gf_msg_trace(table->xl->name, 0, "unlocked table(%p)", table); \
+ pthread_mutex_unlock(&table->table_lock); \
+ } while (0)
+
+#define ioc_local_lock(local) \
+ do { \
+ gf_msg_trace(local->inode->table->xl->name, 0, "locked local(%p)", \
+ local); \
+ pthread_mutex_lock(&local->local_lock); \
+ } while (0)
+
+#define ioc_local_unlock(local) \
+ do { \
+ gf_msg_trace(local->inode->table->xl->name, 0, "unlocked local(%p)", \
+ local); \
+ pthread_mutex_unlock(&local->local_lock); \
+ } while (0)
+
+#define ioc_page_lock(page) \
+ do { \
+ gf_msg_trace(page->inode->table->xl->name, 0, "locked page(%p)", \
+ page); \
+ pthread_mutex_lock(&page->page_lock); \
+ } while (0)
+
+#define ioc_page_unlock(page) \
+ do { \
+ gf_msg_trace(page->inode->table->xl->name, 0, "unlocked page(%p)", \
+ page); \
+ pthread_mutex_unlock(&page->page_lock); \
+ } while (0)
ioc_inode_t *
-ioc_inode_search (ioc_table_t *table, inode_t *inode);
+ioc_inode_search(ioc_table_t *table, inode_t *inode);
-void
-ioc_inode_destroy (ioc_inode_t *ioc_inode);
+void
+ioc_inode_destroy(ioc_inode_t *ioc_inode);
+
+int32_t
+ioc_inode_update(xlator_t *this, inode_t *inode, char *path,
+ struct iatt *iabuf);
ioc_inode_t *
-ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight);
+ioc_inode_create(ioc_table_t *table, inode_t *inode, uint32_t weight);
-int64_t
-ioc_page_destroy (ioc_page_t *page);
+int64_t
+__ioc_page_destroy(ioc_page_t *page);
int64_t
-__ioc_inode_flush (ioc_inode_t *ioc_inode);
+__ioc_inode_flush(ioc_inode_t *ioc_inode);
void
-ioc_inode_flush (ioc_inode_t *ioc_inode);
+ioc_inode_flush(ioc_inode_t *ioc_inode);
void
-ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
- struct stat *stbuf);
+ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iatt *stbuf);
int8_t
-ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct stat *stbuf);
+ioc_cache_still_valid(ioc_inode_t *ioc_inode, struct iatt *stbuf);
int32_t
-ioc_prune (ioc_table_t *table);
+ioc_prune(ioc_table_t *table);
int32_t
-ioc_need_prune (ioc_table_t *table);
+ioc_need_prune(ioc_table_t *table);
-inline uint32_t
-ioc_hashfn (void *data, int len);
#endif /* __IO_CACHE_H */
diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c
index 74c657fe7c3..97767d85285 100644
--- a/xlators/performance/io-cache/src/ioc-inode.c
+++ b/xlators/performance/io-cache/src/ioc-inode.c
@@ -1,28 +1,15 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "io-cache.h"
+#include "ioc-mem-types.h"
extern int ioc_log2_page_size;
@@ -32,14 +19,17 @@ extern int ioc_log2_page_size;
*
*/
void *
-str_to_ptr (char *string)
+str_to_ptr(char *string)
{
- void *ptr = NULL;
+ void *ptr = NULL;
- ptr = (void *)strtoul (string, NULL, 16);
- return ptr;
-}
+ GF_VALIDATE_OR_GOTO("io-cache", string, out);
+ ptr = (void *)strtoul(string, NULL, 16);
+
+out:
+ return ptr;
+}
/*
* ptr_to_str - convert a pointer to string
@@ -47,171 +37,191 @@ str_to_ptr (char *string)
*
*/
char *
-ptr_to_str (void *ptr)
+ptr_to_str(void *ptr)
{
- int ret = 0;
- char *str = NULL;
- ret = asprintf (&str, "%p", ptr);
- if (-1 == ret) {
- gf_log ("ioc", GF_LOG_ERROR,
- "asprintf failed while converting ptr to str");
- return NULL;
- }
- return str;
+ int ret = 0;
+ char *str = NULL;
+
+ GF_VALIDATE_OR_GOTO("io-cache", ptr, out);
+
+ ret = gf_asprintf(&str, "%p", ptr);
+ if (-1 == ret) {
+ gf_smsg("io-cache", GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_STR_COVERSION_FAILED, NULL);
+ str = NULL;
+ goto out;
+ }
+
+out:
+ return str;
}
void
-ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
- struct stat *stbuf)
+ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iatt *stbuf)
{
- ioc_waitq_t *waiter = NULL, *waited = NULL;
- ioc_waitq_t *page_waitq = NULL;
- int8_t cache_still_valid = 1;
- ioc_local_t *local = NULL;
- int8_t need_fault = 0;
- ioc_page_t *waiter_page = NULL;
-
- local = frame->local;
- ioc_inode_lock (ioc_inode);
- {
- waiter = ioc_inode->waitq;
- ioc_inode->waitq = NULL;
- }
- ioc_inode_unlock (ioc_inode);
-
- if (stbuf)
- cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
- else
- cache_still_valid = 0;
-
- if (!waiter) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "cache validate called without any "
- "page waiting to be validated");
- }
-
- while (waiter) {
- waiter_page = waiter->data;
- page_waitq = NULL;
-
- if (waiter_page) {
- if (cache_still_valid) {
- /* cache valid, wake up page */
- ioc_inode_lock (ioc_inode);
- {
- page_waitq =
- ioc_page_wakeup (waiter_page);
- }
- ioc_inode_unlock (ioc_inode);
- if (page_waitq)
- ioc_waitq_return (page_waitq);
- } else {
- /* cache invalid, generate page fault and set
- * page->ready = 0, to avoid double faults
- */
- ioc_inode_lock (ioc_inode);
-
- if (waiter_page->ready) {
- waiter_page->ready = 0;
- need_fault = 1;
- } else {
- gf_log (frame->this->name,
- GF_LOG_TRACE,
- "validate frame(%p) is waiting"
- "for in-transit page = %p",
- frame, waiter_page);
- }
-
- ioc_inode_unlock (ioc_inode);
-
- if (need_fault) {
- need_fault = 0;
- ioc_page_fault (ioc_inode, frame,
- local->fd,
- waiter_page->offset);
- }
- }
- }
-
- waited = waiter;
- waiter = waiter->next;
-
- waited->data = NULL;
- free (waited);
- }
+ ioc_waitq_t *waiter = NULL, *waited = NULL;
+ ioc_waitq_t *page_waitq = NULL;
+ int8_t cache_still_valid = 1;
+ ioc_local_t *local = NULL;
+ int8_t need_fault = 0;
+ ioc_page_t *waiter_page = NULL;
+
+ GF_VALIDATE_OR_GOTO("io-cache", frame, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(frame->this->name, local, out);
+
+ if (ioc_inode == NULL) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_INODE_NULL,
+ NULL);
+ goto out;
+ }
+
+ if (stbuf)
+ cache_still_valid = ioc_cache_still_valid(ioc_inode, stbuf);
+ else
+ cache_still_valid = 0;
+
+ ioc_inode_lock(ioc_inode);
+ {
+ waiter = ioc_inode->waitq;
+ if (!waiter) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_PAGE_WAIT_VALIDATE, NULL);
+
+ ioc_inode_unlock(ioc_inode);
+ goto out;
+ }
+
+ while (waiter) {
+ waiter_page = waiter->data;
+ ioc_inode->waitq = waiter->next;
+ page_waitq = NULL;
+
+ if (waiter_page) {
+ if (cache_still_valid) {
+ /* cache valid, wake up page */
+ page_waitq = __ioc_page_wakeup(waiter_page,
+ waiter_page->op_errno);
+ if (page_waitq) {
+ ioc_inode_unlock(ioc_inode);
+ ioc_waitq_return(page_waitq);
+ ioc_inode_lock(ioc_inode);
+ }
+ } else {
+ /* cache invalid, generate page fault and set
+ * page->ready = 0, to avoid double faults
+ */
+ if (waiter_page->ready) {
+ waiter_page->ready = 0;
+ need_fault = 1;
+ } else {
+ gf_msg_trace(frame->this->name, 0,
+ "validate "
+ "frame(%p) is "
+ "waiting for "
+ "in-transit"
+ " page = %p",
+ frame, waiter_page);
+ }
+
+ if (need_fault) {
+ need_fault = 0;
+ ioc_inode_unlock(ioc_inode);
+ ioc_page_fault(ioc_inode, frame, local->fd,
+ waiter_page->offset);
+ ioc_inode_lock(ioc_inode);
+ }
+ }
+ }
+
+ waited = waiter;
+ waiter = ioc_inode->waitq;
+
+ waited->data = NULL;
+ GF_FREE(waited);
+ }
+ }
+ ioc_inode_unlock(ioc_inode);
+
+out:
+ return;
}
-/*
- * ioc_inode_update - create a new ioc_inode_t structure and add it to
- * the table table. fill in the fields which are derived
+/*
+ * ioc_inode_create - create a new ioc_inode_t structure and add it to
+ * the table table. fill in the fields which are derived
* from inode_t corresponding to the file
- *
+ *
* @table: io-table structure
* @inode: inode structure
*
* not for external reference
*/
ioc_inode_t *
-ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight)
+ioc_inode_create(ioc_table_t *table, inode_t *inode, uint32_t weight)
{
- ioc_inode_t *ioc_inode = NULL;
- unsigned long no_of_pages = 0;
-
- ioc_inode = CALLOC (1, sizeof (ioc_inode_t));
- if (ioc_inode == NULL) {
- goto out;
- }
-
- ioc_inode->table = table;
+ ioc_inode_t *ioc_inode = NULL;
- no_of_pages = (table->cache_size / table->page_size)
- + ((table->cache_size % table->page_size) ? 1 : 0);
+ GF_VALIDATE_OR_GOTO("io-cache", table, out);
- INIT_LIST_HEAD (&ioc_inode->cache.page_lru);
+ ioc_inode = GF_CALLOC(1, sizeof(ioc_inode_t), gf_ioc_mt_ioc_inode_t);
+ if (ioc_inode == NULL) {
+ goto out;
+ }
- ioc_table_lock (table);
+ ioc_inode->inode = inode;
+ ioc_inode->table = table;
+ INIT_LIST_HEAD(&ioc_inode->cache.page_lru);
+ pthread_mutex_init(&ioc_inode->inode_lock, NULL);
+ ioc_inode->weight = weight;
- table->inode_count++;
- list_add (&ioc_inode->inode_list, &table->inodes);
- list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[weight]);
+ ioc_table_lock(table);
+ {
+ table->inode_count++;
+ list_add(&ioc_inode->inode_list, &table->inodes);
+ list_add_tail(&ioc_inode->inode_lru, &table->inode_lru[weight]);
+ }
+ ioc_table_unlock(table);
- gf_log (table->xl->name,
- GF_LOG_TRACE,
- "adding to inode_lru[%d]", weight);
+ gf_msg_trace(table->xl->name, 0, "adding to inode_lru[%d]", weight);
- ioc_table_unlock (table);
-
- pthread_mutex_init (&ioc_inode->inode_lock, NULL);
- ioc_inode->weight = weight;
-
out:
- return ioc_inode;
+ return ioc_inode;
}
-
-/*
+/*
* ioc_inode_destroy - destroy an ioc_inode_t object.
*
* @inode: inode to destroy
*
- * to be called only from ioc_forget.
+ * to be called only from ioc_forget.
*/
void
-ioc_inode_destroy (ioc_inode_t *ioc_inode)
+ioc_inode_destroy(ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = NULL;
+ ioc_table_t *table = NULL;
- table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
- ioc_table_lock (table);
- table->inode_count--;
- list_del (&ioc_inode->inode_list);
- list_del (&ioc_inode->inode_lru);
- ioc_table_unlock (table);
-
- ioc_inode_flush (ioc_inode);
- rbthash_table_destroy (ioc_inode->cache.page_table);
+ table = ioc_inode->table;
- pthread_mutex_destroy (&ioc_inode->inode_lock);
- free (ioc_inode);
+ ioc_table_lock(table);
+ {
+ table->inode_count--;
+ list_del(&ioc_inode->inode_list);
+ list_del(&ioc_inode->inode_lru);
+ }
+ ioc_table_unlock(table);
+
+ ioc_inode_flush(ioc_inode);
+ rbthash_table_destroy(ioc_inode->cache.page_table);
+
+ pthread_mutex_destroy(&ioc_inode->inode_lock);
+ GF_FREE(ioc_inode);
+out:
+ return;
}
diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h
new file mode 100644
index 00000000000..20c9a12021e
--- /dev/null
+++ b/xlators/performance/io-cache/src/ioc-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __IOC_MT_H__
+#define __IOC_MT_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_ioc_mem_types_ {
+ gf_ioc_mt_iovec = gf_common_mt_end + 1,
+ gf_ioc_mt_ioc_table_t,
+ gf_ioc_mt_char,
+ gf_ioc_mt_ioc_waitq_t,
+ gf_ioc_mt_ioc_priority,
+ gf_ioc_mt_list_head,
+ gf_ioc_mt_call_pool_t,
+ gf_ioc_mt_ioc_inode_t,
+ gf_ioc_mt_ioc_fill_t,
+ gf_ioc_mt_ioc_newpage_t,
+ gf_ioc_mt_end
+};
+#endif
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index 964f8d01a09..84b1ae6cb20 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -1,106 +1,190 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "io-cache.h"
+#include "ioc-mem-types.h"
#include <assert.h>
#include <sys/time.h>
-
+#include "io-cache-messages.h"
char
-ioc_empty (struct ioc_cache *cache)
+ioc_empty(struct ioc_cache *cache)
{
- return list_empty (&cache->page_lru);
+ char is_empty = -1;
+
+ GF_VALIDATE_OR_GOTO("io-cache", cache, out);
+
+ is_empty = list_empty(&cache->page_lru);
+
+out:
+ return is_empty;
}
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_get(ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_page_t *page = NULL;
- ioc_table_t *table = NULL;
- off_t rounded_offset = 0;
+ ioc_page_t *page = NULL;
+ ioc_table_t *table = NULL;
+ off_t rounded_offset = 0;
- table = ioc_inode->table;
- rounded_offset = floor (offset, table->page_size);
-
- page = rbthash_get (ioc_inode->cache.page_table, &rounded_offset,
- sizeof (rounded_offset));
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
- if (page != NULL) {
- /* push the page to the end of the lru list */
- list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
- }
+ table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
+
+ rounded_offset = gf_floor(offset, table->page_size);
- return page;
+ page = rbthash_get(ioc_inode->cache.page_table, &rounded_offset,
+ sizeof(rounded_offset));
+
+ if (page != NULL) {
+ /* push the page to the end of the lru list */
+ list_move_tail(&page->page_lru, &ioc_inode->cache.page_lru);
+ }
+
+out:
+ return page;
}
+ioc_page_t *
+ioc_page_get(ioc_inode_t *ioc_inode, off_t offset)
+{
+ ioc_page_t *page = NULL;
+
+ if (ioc_inode == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock(ioc_inode);
+ {
+ page = __ioc_page_get(ioc_inode, offset);
+ }
+ ioc_inode_unlock(ioc_inode);
+
+out:
+ return page;
+}
/*
- * ioc_page_destroy -
+ * __ioc_page_destroy -
*
* @page:
*
*/
int64_t
-ioc_page_destroy (ioc_page_t *page)
+__ioc_page_destroy(ioc_page_t *page)
{
- int64_t page_size = 0;
-
- page_size = iobref_size (page->iobref);
-
- if (page->waitq) {
- /* frames waiting on this page, do not destroy this page */
- page_size = -1;
- } else {
- rbthash_remove (page->inode->cache.page_table, &page->offset,
- sizeof (page->offset));
- list_del (&page->page_lru);
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "destroying page = %p, offset = %"PRId64" "
- "&& inode = %p",
- page, page->offset, page->inode);
-
- if (page->vector){
- iobref_unref (page->iobref);
- free (page->vector);
- page->vector = NULL;
- }
-
- page->inode = NULL;
- }
-
- if (page_size != -1) {
- pthread_mutex_destroy (&page->page_lock);
- free (page);
- }
-
- return page_size;
+ int64_t page_size = 0;
+
+ GF_VALIDATE_OR_GOTO("io-cache", page, out);
+
+ if (page->iobref)
+ page_size = iobref_size(page->iobref);
+
+ if (page->waitq) {
+ /* frames waiting on this page, do not destroy this page */
+ page_size = -1;
+ page->stale = 1;
+ } else {
+ rbthash_remove(page->inode->cache.page_table, &page->offset,
+ sizeof(page->offset));
+ list_del(&page->page_lru);
+
+ gf_msg_trace(page->inode->table->xl->name, 0,
+ "destroying page = %p, offset = %" PRId64
+ " "
+ "&& inode = %p",
+ page, page->offset, page->inode);
+
+ if (page->vector) {
+ iobref_unref(page->iobref);
+ GF_FREE(page->vector);
+ page->vector = NULL;
+ }
+
+ page->inode = NULL;
+ }
+
+ if (page_size != -1) {
+ pthread_mutex_destroy(&page->page_lock);
+ GF_FREE(page);
+ }
+
+out:
+ return page_size;
}
+int64_t
+ioc_page_destroy(ioc_page_t *page)
+{
+ int64_t ret = 0;
+ struct ioc_inode *inode = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock(page->inode);
+ {
+ inode = page->inode;
+ ret = __ioc_page_destroy(page);
+ }
+ ioc_inode_unlock(inode);
+
+out:
+ return ret;
+}
+
+int32_t
+__ioc_inode_prune(ioc_inode_t *curr, uint64_t *size_pruned,
+ uint64_t size_to_prune, uint32_t index)
+{
+ ioc_page_t *page = NULL, *next = NULL;
+ int32_t ret = 0;
+ ioc_table_t *table = NULL;
+
+ if (curr == NULL) {
+ goto out;
+ }
+
+ table = curr->table;
+
+ list_for_each_entry_safe(page, next, &curr->cache.page_lru, page_lru)
+ {
+ *size_pruned += page->size;
+ ret = __ioc_page_destroy(page);
+
+ if (ret != -1)
+ table->cache_used -= ret;
+
+ gf_msg_trace(table->xl->name, 0,
+ "index = %d && "
+ "table->cache_used = %" PRIu64
+ " && table->"
+ "cache_size = %" PRIu64,
+ index, table->cache_used, table->cache_size);
+
+ if ((*size_pruned) >= size_to_prune)
+ break;
+ }
+
+ if (ioc_empty(&curr->cache)) {
+ list_del_init(&curr->inode_lru);
+ }
+
+out:
+ return 0;
+}
/*
* ioc_prune - prune the cache. we have a limit to the number of pages we
* can have in-memory.
@@ -109,171 +193,157 @@ ioc_page_destroy (ioc_page_t *page)
*
*/
int32_t
-ioc_prune (ioc_table_t *table)
+ioc_prune(ioc_table_t *table)
{
- ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
- ioc_page_t *page = NULL, *next = NULL;
- int32_t ret = -1;
- int32_t index = 0;
- uint64_t size_to_prune = 0;
- uint64_t size_pruned = 0;
-
- ioc_table_lock (table);
- {
- size_to_prune = table->cache_used - table->cache_size;
- /* take out the least recently used inode */
- for (index=0; index < table->max_pri; index++) {
- list_for_each_entry_safe (curr, next_ioc_inode,
- &table->inode_lru[index],
- inode_lru) {
- /* prune page-by-page for this inode, till
- * we reach the equilibrium */
- ioc_inode_lock (curr);
- /* { */
-
- list_for_each_entry_safe (page, next,
- &curr->cache.page_lru,
- page_lru) {
- /* done with all pages, and not
- * reached equilibrium yet??
- * continue with next inode in
- * lru_list */
- size_pruned += page->size;
- ret = ioc_page_destroy (page);
-
- if (ret != -1)
- table->cache_used -= ret;
-
- gf_log (table->xl->name,
- GF_LOG_TRACE,
- "index = %d && table->cache_"
- "used = %"PRIu64" && table->"
- "cache_size = %"PRIu64,
- index, table->cache_used,
- table->cache_size);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe(page...) */
- if (ioc_empty (&curr->cache)) {
- list_del_init (&curr->inode_lru);
- }
-
- /* } */
- ioc_inode_unlock (curr);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe (curr...) */
-
- if (size_pruned >= size_to_prune)
- break;
- } /* for(index=0;...) */
-
- } /* ioc_inode_table locked region end */
- ioc_table_unlock (table);
-
- return 0;
+ ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
+ int32_t index = 0;
+ uint64_t size_to_prune = 0;
+ uint64_t size_pruned = 0;
+
+ GF_VALIDATE_OR_GOTO("io-cache", table, out);
+
+ ioc_table_lock(table);
+ {
+ size_to_prune = table->cache_used - table->cache_size;
+ /* take out the least recently used inode */
+ for (index = 0; index < table->max_pri; index++) {
+ list_for_each_entry_safe(curr, next_ioc_inode,
+ &table->inode_lru[index], inode_lru)
+ {
+ /* prune page-by-page for this inode, till
+ * we reach the equilibrium */
+ ioc_inode_lock(curr);
+ {
+ __ioc_inode_prune(curr, &size_pruned, size_to_prune, index);
+ }
+ ioc_inode_unlock(curr);
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* list_for_each_entry_safe (curr...) */
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* for(index=0;...) */
+
+ } /* ioc_inode_table locked region end */
+ ioc_table_unlock(table);
+
+out:
+ return 0;
}
/*
- * ioc_page_create - create a new page.
+ * __ioc_page_create - create a new page.
*
- * @ioc_inode:
+ * @ioc_inode:
* @offset:
*
*/
ioc_page_t *
-ioc_page_create (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_create(ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t rounded_offset = 0;
- ioc_page_t *newpage = NULL;
-
- table = ioc_inode->table;
- rounded_offset = floor (offset, table->page_size);
-
- newpage = CALLOC (1, sizeof (*newpage));
- if (newpage == NULL) {
- goto out;
- }
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ioc_page_t *newpage = NULL;
- if (ioc_inode) {
- table = ioc_inode->table;
- } else {
- free (newpage);
- newpage = NULL;
- goto out;
- }
-
- newpage->offset = rounded_offset;
- newpage->inode = ioc_inode;
- pthread_mutex_init (&newpage->page_lock, NULL);
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
+
+ table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO("io-cache", table, out);
+
+ rounded_offset = gf_floor(offset, table->page_size);
+
+ newpage = GF_CALLOC(1, sizeof(*newpage), gf_ioc_mt_ioc_newpage_t);
+ if (newpage == NULL) {
+ goto out;
+ }
+
+ if (!ioc_inode) {
+ GF_FREE(newpage);
+ newpage = NULL;
+ goto out;
+ }
+
+ newpage->offset = rounded_offset;
+ newpage->inode = ioc_inode;
+ pthread_mutex_init(&newpage->page_lock, NULL);
+
+ rbthash_insert(ioc_inode->cache.page_table, newpage, &rounded_offset,
+ sizeof(rounded_offset));
- rbthash_insert (ioc_inode->cache.page_table, newpage, &rounded_offset,
- sizeof (rounded_offset));
-
- list_add_tail (&newpage->page_lru, &ioc_inode->cache.page_lru);
+ list_add_tail(&newpage->page_lru, &ioc_inode->cache.page_lru);
- page = newpage;
+ page = newpage;
- gf_log ("io-cache", GF_LOG_TRACE,
- "returning new page %p", page);
+ gf_msg_trace("io-cache", 0, "returning new page %p", page);
out:
- return page;
+ return page;
}
-/*
- * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
- * here we need to handle the case when the frame who calls wait_on_page
- * himself has caused page_fault
+/*
+ * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
+ * here we need to handle the case when the frame who calls wait_on_page
+ * himself has caused page_fault
*
* @page: page to wait on
* @frame: call frame who is waiting on page
*
*/
void
-ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
+__ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size)
{
- ioc_waitq_t *waitq = NULL;
- ioc_local_t *local = frame->local;
-
- waitq = CALLOC (1, sizeof (*waitq));
- if (waitq == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) waiting on page = %p, offset=%"PRId64", "
- "size=%"GF_PRI_SIZET"",
- frame, page, offset, size);
-
- waitq->data = frame;
- waitq->next = page->waitq;
- waitq->pending_offset = offset;
- waitq->pending_size = size;
- page->waitq = waitq;
- /* one frame can wait only once on a given page,
- * local->wait_count is number of pages a frame is waiting on */
- ioc_local_lock (local);
- {
- local->wait_count++;
- }
- ioc_local_unlock (local);
+ ioc_waitq_t *waitq = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("io-cache", frame, out);
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO(frame->this->name, local, out);
+
+ if (page == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_NULL_PAGE_WAIT, NULL);
+ goto out;
+ }
+
+ waitq = GF_CALLOC(1, sizeof(*waitq), gf_ioc_mt_ioc_waitq_t);
+ if (waitq == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ gf_msg_trace(frame->this->name, 0,
+ "frame(%p) waiting on page = %p, offset=%" PRId64
+ ", "
+ "size=%" GF_PRI_SIZET "",
+ frame, page, offset, size);
+
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ waitq->pending_offset = offset;
+ waitq->pending_size = size;
+ page->waitq = waitq;
+ /* one frame can wait only once on a given page,
+ * local->wait_count is number of pages a frame is waiting on */
+ ioc_local_lock(local);
+ {
+ local->wait_count++;
+ }
+ ioc_local_unlock(local);
out:
- return;
+ return;
}
-
/*
- * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
+ * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
* against given stbuf
*
* @ioc_inode:
@@ -282,204 +352,203 @@ out:
* assumes ioc_inode is locked
*/
int8_t
-ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct stat *stbuf)
+ioc_cache_still_valid(ioc_inode_t *ioc_inode, struct iatt *stbuf)
{
- int8_t cache_still_valid = 1;
+ int8_t cache_still_valid = 1;
+
+ GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out);
#if 0
- if (!stbuf || (stbuf->st_mtime != ioc_inode->cache.mtime) ||
- (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
- cache_still_valid = 0;
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
+ (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
+ cache_still_valid = 0;
#else
- if (!stbuf || (stbuf->st_mtime != ioc_inode->cache.mtime)
- || (ST_MTIM_NSEC(stbuf) != ioc_inode->cache.mtime_nsec))
- cache_still_valid = 0;
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
+ (stbuf->ia_mtime_nsec != ioc_inode->cache.mtime_nsec))
+ cache_still_valid = 0;
#endif
#if 0
- /* talk with avati@gluster.com to enable this section */
- if (!ioc_inode->mtime && stbuf) {
- cache_still_valid = 1;
- ioc_inode->mtime = stbuf->st_mtime;
- }
+ /* talk with avati@gluster.com to enable this section */
+ if (!ioc_inode->mtime && stbuf) {
+ cache_still_valid = 1;
+ ioc_inode->mtime = stbuf->ia_mtime;
+ }
#endif
- return cache_still_valid;
+out:
+ return cache_still_valid;
}
-
void
-ioc_waitq_return (ioc_waitq_t *waitq)
+ioc_waitq_return(ioc_waitq_t *waitq)
{
- ioc_waitq_t *trav = NULL;
- ioc_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ioc_waitq_t *trav = NULL;
+ ioc_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ioc_frame_return (frame);
- free (trav);
- }
+ frame = trav->data;
+ ioc_frame_return(frame);
+ GF_FREE(trav);
+ }
}
-
int
-ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- off_t offset = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- int32_t destroy_size = 0;
- size_t page_size = 0;
- ioc_waitq_t *waitq = NULL;
- size_t iobref_page_size = 0;
- char zero_filled = 0;
+ ioc_local_t *local = NULL;
+ off_t offset = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int32_t destroy_size = 0;
+ size_t page_size = 0;
+ ioc_waitq_t *waitq = NULL;
+ size_t iobref_page_size = 0;
+ char zero_filled = 0;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ GF_ASSERT(local);
+
+ offset = local->pending_offset;
+ ioc_inode = local->inode;
+ GF_ASSERT(ioc_inode);
+
+ table = ioc_inode->table;
+ GF_ASSERT(table);
+
+ zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0));
+
+ ioc_inode_lock(ioc_inode);
+ {
+ if (op_ret == -1 ||
+ !(zero_filled || ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_msg_trace(ioc_inode->table->xl->name, 0,
+ "cache for inode(%p) is invalid. flushing "
+ "all pages",
+ ioc_inode);
+ destroy_size = __ioc_inode_flush(ioc_inode);
+ }
- local = frame->local;
- offset = local->pending_offset;
- ioc_inode = local->inode;
- table = ioc_inode->table;
-
- trav_offset = offset;
- payload_size = op_ret;
-
- zero_filled = ((op_ret >=0)
- && (stbuf->st_mtime == 0));
-
- ioc_inode_lock (ioc_inode);
- {
- if (op_ret == -1 ||
- !(zero_filled ||
- ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "cache for inode(%p) is invalid. flushing "
- "all pages", ioc_inode);
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
-
- if ((op_ret >= 0) && !zero_filled) {
- ioc_inode->cache.mtime = stbuf->st_mtime;
- ST_MTIM_NSEC_SET(stbuf, ioc_inode->cache.mtime_nsec);
+ if ((op_ret >= 0) && !zero_filled) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
+
+ ioc_inode->cache.last_revalidate = gf_time();
+
+ if (op_ret < 0) {
+ /* error, readv returned -1 */
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page)
+ waitq = __ioc_page_error(page, op_ret, op_errno);
+ } else {
+ gf_msg_trace(ioc_inode->table->xl->name, 0, "op_ret = %d", op_ret);
+ page = __ioc_page_get(ioc_inode, offset);
+ if (!page) {
+ /* page was flushed */
+ /* some serious bug ? */
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_WASTED_COPY, "offset=%" PRId64, offset,
+ "page-size=%" PRId64, table->page_size, "ioc_inode=%p",
+ ioc_inode, NULL);
+ } else {
+ if (page->vector) {
+ iobref_unref(page->iobref);
+ GF_FREE(page->vector);
+ page->vector = NULL;
+ page->iobref = NULL;
+ }
+
+ /* keep a copy of the page for our cache */
+ page->vector = iov_dup(vector, count);
+ if (page->vector == NULL) {
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page != NULL)
+ waitq = __ioc_page_error(page, -1, ENOMEM);
+ goto unlock;
}
- gettimeofday (&ioc_inode->cache.tv, NULL);
-
- if (op_ret < 0) {
- /* error, readv returned -1 */
- page = ioc_page_get (ioc_inode, offset);
- if (page)
- waitq = ioc_page_error (page, op_ret,
- op_errno);
- } else {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "op_ret = %d", op_ret);
- page = ioc_page_get (ioc_inode, offset);
- if (!page) {
- /* page was flushed */
- /* some serious bug ? */
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] "
- "ioc_inode=%p", offset,
- table->page_size, ioc_inode);
- } else {
- if (page->vector) {
- iobref_unref (page->iobref);
- free (page->vector);
- page->vector = NULL;
- }
-
- /* keep a copy of the page for our cache */
- page->vector = iov_dup (vector, count);
- if (page->vector == NULL) {
- page = ioc_page_get (ioc_inode, offset);
- if (page != NULL)
- waitq = ioc_page_error (page,
- -1,
- ENOMEM);
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
-
- page->count = count;
- if (iobref) {
- page->iobref = iobref_ref (iobref);
- } else {
- /* TODO: we have got a response to
- * our request and no data */
- gf_log (this->name, GF_LOG_CRITICAL,
- "frame>root>rsp_refs is null");
- } /* if(frame->root->rsp_refs) */
-
- /* page->size should indicate exactly how
- * much the readv call to the child
- * translator returned. earlier op_ret
- * from child translator was used, which
- * gave rise to a bug where reads from
- * io-cached volume were resulting in 0
- * byte replies */
- page_size = iov_length(vector, count);
-
- page->size = page_size;
-
- iobref_page_size = iobref_size (page->iobref);
-
- if (page->waitq) {
- /* wake up all the frames waiting on
- * this page, including
- * the frame which triggered fault */
- waitq = ioc_page_wakeup (page);
- } /* if(page->waitq) */
- } /* if(!page)...else */
- } /* if(op_ret < 0)...else */
- } /* ioc_inode locked region end */
+ page->count = count;
+ if (iobref) {
+ page->iobref = iobref_ref(iobref);
+ } else {
+ /* TODO: we have got a response to
+ * our request and no data */
+ gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,
+ IO_CACHE_MSG_FRAME_NULL, NULL);
+ } /* if(frame->root->rsp_refs) */
+
+ /* page->size should indicate exactly how
+ * much the readv call to the child
+ * translator returned. earlier op_ret
+ * from child translator was used, which
+ * gave rise to a bug where reads from
+ * io-cached volume were resulting in 0
+ * byte replies */
+ page_size = iov_length(vector, count);
+ page->size = page_size;
+ page->op_errno = op_errno;
+
+ iobref_page_size = iobref_size(page->iobref);
+
+ if (page->waitq) {
+ /* wake up all the frames waiting on
+ * this page, including
+ * the frame which triggered fault */
+ waitq = __ioc_page_wakeup(page, op_errno);
+ } /* if(page->waitq) */
+ } /* if(!page)...else */
+ } /* if(op_ret < 0)...else */
+ } /* ioc_inode locked region end */
unlock:
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_unlock(ioc_inode);
- ioc_waitq_return (waitq);
+ ioc_waitq_return(waitq);
- if (iobref_page_size) {
- ioc_table_lock (table);
- {
- table->cache_used += iobref_page_size;
- }
- ioc_table_unlock (table);
- }
+ if (iobref_page_size) {
+ ioc_table_lock(table);
+ {
+ table->cache_used += iobref_page_size;
+ }
+ ioc_table_unlock(table);
+ }
- if (destroy_size) {
- ioc_table_lock (table);
- {
- table->cache_used -= destroy_size;
- }
- ioc_table_unlock (table);
- }
+ if (destroy_size) {
+ ioc_table_lock(table);
+ {
+ table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock(table);
+ }
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune(ioc_inode->table)) {
+ ioc_prune(ioc_inode->table);
+ }
- gf_log (this->name, GF_LOG_TRACE, "fault frame %p returned", frame);
- pthread_mutex_destroy (&local->local_lock);
+ gf_msg_trace(frame->this->name, 0, "fault frame %p returned", frame);
+ pthread_mutex_destroy(&local->local_lock);
- fd_unref (local->fd);
+ fd_unref(local->fd);
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
/*
* ioc_page_fault -
- *
+ *
* @ioc_inode:
* @frame:
* @fd:
@@ -487,201 +556,216 @@ unlock:
*
*/
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset)
+ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset)
{
- ioc_table_t *table = NULL;
- call_frame_t *fault_frame = NULL;
- ioc_local_t *fault_local = NULL;
- int32_t op_ret = -1, op_errno = -1;
- ioc_waitq_t *waitq = NULL;
- ioc_page_t *page = NULL;
-
- table = ioc_inode->table;
- fault_frame = copy_frame (frame);
- if (fault_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- goto err;
+ ioc_table_t *table = NULL;
+ call_frame_t *fault_frame = NULL;
+ ioc_local_t *fault_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+ ioc_waitq_t *waitq = NULL;
+ ioc_page_t *page = NULL;
+
+ GF_ASSERT(ioc_inode);
+ if (frame == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg("io-cache", GF_LOG_WARNING, EINVAL, IO_CACHE_MSG_PAGE_FAULT,
+ NULL);
+ goto err;
+ }
+
+ table = ioc_inode->table;
+ fault_frame = copy_frame(frame);
+ if (fault_frame == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = frame->local;
+ fault_local = mem_get0(THIS->local_pool);
+ if (fault_local == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ STACK_DESTROY(fault_frame->root);
+ goto err;
+ }
+
+ /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
+ * are using till now won't be valid till we get reply from server.
+ * we unref this fd, in fault_cbk */
+ fault_local->fd = fd_ref(fd);
+
+ fault_frame->local = fault_local;
+ pthread_mutex_init(&fault_local->local_lock, NULL);
+
+ INIT_LIST_HEAD(&fault_local->fill_list);
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = table->page_size;
+ fault_local->inode = ioc_inode;
+
+ if (local && local->xattr_req)
+ fault_local->xattr_req = dict_ref(local->xattr_req);
+
+ gf_msg_trace(frame->this->name, 0,
+ "stack winding page fault for offset = %" PRId64
+ " with "
+ "frame %p",
+ offset, fault_frame);
+
+ STACK_WIND(fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv, fd,
+ table->page_size, offset, 0, fault_local->xattr_req);
+ return;
+
+err:
+ ioc_inode_lock(ioc_inode);
+ {
+ page = __ioc_page_get(ioc_inode, offset);
+ if (page != NULL) {
+ waitq = __ioc_page_error(page, op_ret, op_errno);
}
+ }
+ ioc_inode_unlock(ioc_inode);
- fault_local = CALLOC (1, sizeof (ioc_local_t));
- if (fault_local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- STACK_DESTROY (fault_frame->root);
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
- goto err;
+ if (waitq != NULL) {
+ ioc_waitq_return(waitq);
+ }
+}
+
+int32_t
+__ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size, int32_t op_errno)
+{
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_fill_t *new = NULL;
+ int8_t found = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("io-cache", frame, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(frame->this->name, local, out);
+
+ if (page == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ IO_CACHE_MSG_SERVE_READ_REQUEST, NULL);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ ioc_inode = page->inode;
+
+ gf_msg_trace(frame->this->name, 0,
+ "frame (%p) offset = %" PRId64 " && size = %" GF_PRI_SIZET
+ " "
+ "&& page->size = %" GF_PRI_SIZET " && wait_count = %d",
+ frame, offset, size, page->size, local->wait_count);
+
+ /* immediately move this page to the end of the page_lru list */
+ list_move_tail(&page->page_lru, &ioc_inode->cache.page_lru);
+ /* fill local->pending_size bytes from local->pending_offset */
+ if (local->op_ret != -1) {
+ local->op_errno = op_errno;
+
+ if (page->size == 0) {
+ goto done;
}
- /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
- * are using till now won't be valid till we get reply from server.
- * we unref this fd, in fault_cbk */
- fault_local->fd = fd_ref (fd);
+ if (offset > page->offset)
+ /* offset is offset in file, convert it to offset in
+ * page */
+ src_offset = offset - page->offset;
+ /*FIXME: since offset is the offset within page is the
+ * else case valid? */
+ else
+ /* local->pending_offset is in previous page. do not
+ * fill until we have filled all previous pages */
+ dst_offset = page->offset - offset;
+
+ /* we have to copy from offset to either end of this page
+ * or till the requested size */
+ copy_size = min(page->size - src_offset, size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
- fault_frame->local = fault_local;
- pthread_mutex_init (&fault_local->local_lock, NULL);
+ gf_msg_trace(page->inode->table->xl->name, 0,
+ "copy_size = %" GF_PRI_SIZET
+ " && src_offset = "
+ "%" PRId64 " && dst_offset = %" PRId64 "",
+ copy_size, src_offset, dst_offset);
- INIT_LIST_HEAD (&fault_local->fill_list);
- fault_local->pending_offset = offset;
- fault_local->pending_size = table->page_size;
- fault_local->inode = ioc_inode;
+ {
+ new = GF_CALLOC(1, sizeof(*new), gf_ioc_mt_ioc_fill_t);
+ if (new == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref(page->iobref);
+ new->count = iov_subset(page->vector, page->count, src_offset,
+ copy_size, &new->vector, 0);
+ if (new->count < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "stack winding page fault for offset = %"PRId64" with "
- "frame %p", offset, fault_frame);
-
- STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
- FIRST_CHILD(fault_frame->this)->fops->readv, fd,
- table->page_size, offset);
- return;
+ iobref_unref(new->iobref);
+ GF_FREE(new);
+ goto out;
+ }
+
+ /* add the ioc_fill to fill_list for this frame */
+ if (list_empty(&local->fill_list)) {
+ /* if list is empty, then this is the first
+ * time we are filling frame, add the
+ * ioc_fill_t to the end of list */
+ list_add_tail(&new->list, &local->fill_list);
+ } else {
+ found = 0;
+ /* list is not empty, we need to look for
+ * where this offset fits in list */
+ list_for_each_entry(fill, &local->fill_list, list)
+ {
+ if (fill->offset > new->offset) {
+ found = 1;
+ break;
+ }
+ }
-err:
- page = ioc_page_get (ioc_inode, offset);
- if (page != NULL) {
- waitq = ioc_page_error (page, op_ret, op_errno);
- if (waitq != NULL) {
- ioc_waitq_return (waitq);
+ if (found) {
+ list_add_tail(&new->list, &fill->list);
+ } else {
+ list_add_tail(&new->list, &local->fill_list);
}
+ }
}
-}
-int32_t
-ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
-{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_fill_t *new = NULL;
- int8_t found = 0;
- int32_t ret = 0;
-
- local = frame->local;
- ioc_inode = page->inode;
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
- "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
- frame, offset, size, page->size, local->wait_count);
-
- /* immediately move this page to the end of the page_lru list */
- list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
- /* fill local->pending_size bytes from local->pending_offset */
- if (local->op_ret != -1 && page->size) {
- if (offset > page->offset)
- /* offset is offset in file, convert it to offset in
- * page */
- src_offset = offset - page->offset;
- /*FIXME: since offset is the offset within page is the
- * else case valid? */
- else
- /* local->pending_offset is in previous page. do not
- * fill until we have filled all previous pages */
- dst_offset = page->offset - offset;
-
- /* we have to copy from offset to either end of this page
- * or till the requested size */
- copy_size = min (page->size - src_offset,
- size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "copy_size = %"GF_PRI_SIZET" && src_offset = "
- "%"PRId64" && dst_offset = %"PRId64"",
- copy_size, src_offset, dst_offset);
-
- {
- new = CALLOC (1, sizeof (*new));
- if (new == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- ret = -1;
- gf_log (page->inode->table->xl->name,
- GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector,
- page->count,
- src_offset,
- src_offset + copy_size,
- NULL);
-
- new->vector = CALLOC (new->count,
- sizeof (struct iovec));
- if (new->vector == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
-
- iobref_unref (new->iobref);
- FREE (new);
-
- ret = -1;
- gf_log (page->inode->table->xl->name,
- GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- new->count = iov_subset (page->vector,
- page->count,
- src_offset,
- src_offset + copy_size,
- new->vector);
-
-
-
- /* add the ioc_fill to fill_list for this frame */
- if (list_empty (&local->fill_list)) {
- /* if list is empty, then this is the first
- * time we are filling frame, add the
- * ioc_fill_t to the end of list */
- list_add_tail (&new->list, &local->fill_list);
- } else {
- found = 0;
- /* list is not empty, we need to look for
- * where this offset fits in list */
- list_for_each_entry (fill, &local->fill_list,
- list) {
- if (fill->offset > new->offset) {
- found = 1;
- break;
- }
- }
-
- if (found) {
- found = 0;
- list_add_tail (&new->list,
- &fill->list);
- } else {
- list_add_tail (&new->list,
- &local->fill_list);
- }
- }
- }
- local->op_ret += copy_size;
- }
+ local->op_ret += copy_size;
+ }
+done:
+ ret = 0;
out:
- return ret;
+ return ret;
}
/*
- * ioc_frame_unwind - frame unwinds only from here
+ * ioc_frame_unwind - frame unwinds only from here
*
* @frame: call frame to unwind
*
@@ -690,88 +774,109 @@ out:
*
*/
static void
-ioc_frame_unwind (call_frame_t *frame)
+ioc_frame_unwind(call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL, *next = NULL;
- int32_t count = 0;
- struct iovec *vector = NULL;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- struct stat stbuf = {0,};
- int32_t op_ret = 0, op_errno = 0;
-
- local = frame->local;
- // ioc_local_lock (local);
- frame->local = NULL;
- iobref = iobref_new ();
- if (iobref == NULL) {
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL, *next = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int32_t op_ret = 0, op_errno = 0;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ IO_CACHE_MSG_LOCAL_NULL, NULL);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0) {
+ op_ret = local->op_ret;
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ // ioc_local_lock (local);
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+
+ if (list_empty(&local->fill_list)) {
+ gf_msg_trace(frame->this->name, 0,
+ "frame(%p) has 0 entries in local->fill_list "
+ "(offset = %" PRId64 " && size = %" GF_PRI_SIZET ")",
+ frame, local->offset, local->size);
+ }
+
+ list_for_each_entry(fill, &local->fill_list, list) { count += fill->count; }
+
+ vector = GF_CALLOC(count, sizeof(*vector), gf_ioc_mt_iovec);
+ if (vector == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+
+ list_for_each_entry_safe(fill, next, &local->fill_list, list)
+ {
+ /* # TODO: check why this if clause is needed at all. */
+ if ((vector != NULL) && (iobref != NULL)) {
+ memcpy(((char *)vector) + copied, fill->vector,
+ fill->count * sizeof(*vector));
+
+ copied += (fill->count * sizeof(*vector));
+
+ if (iobref_merge(iobref, fill->iobref)) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
+ }
}
- if (list_empty (&local->fill_list)) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) has 0 entries in local->fill_list "
- "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
- frame, local->offset, local->size);
- }
-
- list_for_each_entry (fill, &local->fill_list, list) {
- count += fill->count;
- }
-
- vector = CALLOC (count, sizeof (*vector));
- if (vector == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
-
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
- }
-
- list_for_each_entry_safe (fill, next, &local->fill_list, list) {
- if ((vector != NULL) && (iobref != NULL)) {
- memcpy (((char *)vector) + copied,
- fill->vector,
- fill->count * sizeof (*vector));
-
- copied += (fill->count * sizeof (*vector));
-
- iobref_merge (iobref, fill->iobref);
- }
-
- list_del (&fill->list);
- iobref_unref (fill->iobref);
- free (fill->vector);
- free (fill);
- }
-
- if (op_ret != -1) {
- op_ret = iov_length (vector, count);
- }
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) unwinding with op_ret=%d", frame, op_ret);
-
- // ioc_local_unlock (local);
-
- STACK_UNWIND_STRICT (readv, frame, op_ret, local->op_errno, vector,
- count, &stbuf, iobref);
-
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
-
- if (vector != NULL) {
- free (vector);
- vector = NULL;
- }
-
- pthread_mutex_destroy (&local->local_lock);
- free (local);
-
- return;
+ list_del(&fill->list);
+ iobref_unref(fill->iobref);
+ GF_FREE(fill->vector);
+ GF_FREE(fill);
+ }
+
+ if (op_ret != -1) {
+ op_ret = iov_length(vector, count);
+ }
+
+unwind:
+ gf_msg_trace(frame->this->name, 0, "frame(%p) unwinding with op_ret=%d",
+ frame, op_ret);
+
+ // ioc_local_unlock (local);
+
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, &stbuf,
+ iobref, NULL);
+
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ if (vector != NULL) {
+ GF_FREE(vector);
+ vector = NULL;
+ }
+
+ if (local) {
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+ pthread_mutex_destroy(&local->local_lock);
+ mem_put(local);
+ }
+ return;
}
/*
@@ -781,61 +886,117 @@ ioc_frame_unwind (call_frame_t *frame)
* to be called only when a frame is waiting on an in-transit page
*/
void
-ioc_frame_return (call_frame_t *frame)
+ioc_frame_return(call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- int32_t wait_count = 0;
+ ioc_local_t *local = NULL;
+ int32_t wait_count = 0;
- local = frame->local;
- assert (local->wait_count > 0);
+ GF_ASSERT(frame);
- ioc_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ioc_local_unlock (local);
+ local = frame->local;
+ GF_ASSERT(local->wait_count > 0);
- if (!wait_count) {
- ioc_frame_unwind (frame);
- }
+ ioc_local_lock(local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ioc_local_unlock(local);
- return;
+ if (!wait_count) {
+ ioc_frame_unwind(frame);
+ }
+
+ return;
}
-/*
+/*
* ioc_page_wakeup -
* @page:
*
* to be called only when a frame is waiting on an in-transit page
*/
ioc_waitq_t *
-ioc_page_wakeup (ioc_page_t *page)
+__ioc_page_wakeup(ioc_page_t *page, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int32_t ret = -1;
-
- waitq = page->waitq;
- page->waitq = NULL;
-
- trav = waitq;
- page->ready = 1;
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "page is %p && waitq = %p", page, waitq);
-
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ret = ioc_frame_fill (page, frame, trav->pending_offset,
- trav->pending_size);
- if (ret == -1) {
- break;
- }
- }
-
- return waitq;
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("io-cache", page, out);
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ page->ready = 1;
+
+ gf_msg_trace(page->inode->table->xl->name, 0, "page is %p && waitq = %p",
+ page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ret = __ioc_frame_fill(page, frame, trav->pending_offset,
+ trav->pending_size, op_errno);
+ if (ret == -1) {
+ break;
+ }
+ }
+
+ if (page->stale) {
+ __ioc_page_destroy(page);
+ }
+
+out:
+ return waitq;
}
+/*
+ * ioc_page_error -
+ * @page:
+ * @op_ret:
+ * @op_errno:
+ *
+ */
+ioc_waitq_t *
+__ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno)
+{
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int64_t ret = 0;
+ ioc_table_t *table = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("io-cache", page, out);
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ gf_msg_debug(page->inode->table->xl->name, 0,
+ "page error for page = %p & waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+
+ local = frame->local;
+ ioc_local_lock(local);
+ {
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ ioc_local_unlock(local);
+ }
+
+ table = page->inode->table;
+ ret = __ioc_page_destroy(page);
+
+ if (ret != -1) {
+ table->cache_used -= ret;
+ }
+
+out:
+ return waitq;
+}
/*
* ioc_page_error -
@@ -845,41 +1006,22 @@ ioc_page_wakeup (ioc_page_t *page)
*
*/
ioc_waitq_t *
-ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
+ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int64_t ret = 0;
- ioc_table_t *table = NULL;
- ioc_local_t *local = NULL;
-
- waitq = page->waitq;
- page->waitq = NULL;
-
- gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
- "page error for page = %p & waitq = %p", page, waitq);
-
- for (trav = waitq; trav; trav = trav->next) {
-
- frame = trav->data;
-
- local = frame->local;
- ioc_local_lock (local);
- {
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- ioc_local_unlock (local);
- }
-
- table = page->inode->table;
- ret = ioc_page_destroy (page);
-
- if (ret != -1) {
- table->cache_used -= ret;
- }
-
- return waitq;
+ ioc_waitq_t *waitq = NULL;
+ struct ioc_inode *inode = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock(page->inode);
+ {
+ inode = page->inode;
+ waitq = __ioc_page_error(page, op_ret, op_errno);
+ }
+ ioc_inode_unlock(inode);
+
+out:
+ return waitq;
}
diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am
index 38dea3eb7fc..7570cf41ed2 100644
--- a/xlators/performance/io-threads/src/Makefile.am
+++ b/xlators/performance/io-threads/src/Makefile.am
@@ -1,14 +1,16 @@
xlator_LTLIBRARIES = io-threads.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_threads_la_LDFLAGS = -module -avoidversion
+io_threads_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_threads_la_SOURCES = io-threads.c
io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-threads.h
+noinst_HEADERS = io-threads.h iot-mem-types.h io-threads-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/io-threads/src/io-threads-messages.h b/xlators/performance/io-threads/src/io-threads-messages.h
new file mode 100644
index 00000000000..6229c353f96
--- /dev/null
+++ b/xlators/performance/io-threads/src/io-threads-messages.h
@@ -0,0 +1,41 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IO_THREADS_MESSAGES_H_
+#define _IO_THREADS_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(IO_THREADS, IO_THREADS_MSG_INIT_FAILED,
+ IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, IO_THREADS_MSG_NO_MEMORY,
+ IO_THREADS_MSG_VOL_MISCONFIGURED, IO_THREADS_MSG_SIZE_NOT_SET,
+ IO_THREADS_MSG_OUT_OF_MEMORY, IO_THREADS_MSG_PTHREAD_INIT_FAILED,
+ IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED);
+
+#define IO_THREADS_MSG_INIT_FAILED_STR "Thread attribute initialization failed"
+#define IO_THREADS_MSG_SIZE_NOT_SET_STR "Using default thread stack size"
+#define IO_THREADS_MSG_NO_MEMORY_STR "Memory accounting init failed"
+#define IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED_STR \
+ "FATAL: iot not configured with exactly one child"
+#define IO_THREADS_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile"
+#define IO_THREADS_MSG_OUT_OF_MEMORY_STR "out of memory"
+#define IO_THREADS_MSG_PTHREAD_INIT_FAILED_STR "init failed"
+#define IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED_STR \
+ "cannot initialize worker threads, exiting init"
+#endif /* _IO_THREADS_MESSAGES_H_ */
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index e99012cc0d6..3d24cc97f4b 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -1,2948 +1,1590 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "io-threads.h"
+#include <signal.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
-#include "locking.h"
-
-typedef void *(*iot_worker_fn)(void*);
-
-void
-iot_stop_worker (iot_worker_t *worker);
-
-void
-iot_stop_workers (iot_worker_t **workers, int start_idx, int count);
-
-void
-_iot_queue (iot_worker_t *worker, iot_request_t *req);
-
-iot_request_t *
-iot_init_request (iot_worker_t *conf, call_stub_t *stub);
-
-int
-iot_startup_workers (iot_worker_t **workers, int start_idx, int count,
- iot_worker_fn workerfunc);
+#include <glusterfs/locking.h>
+#include "io-threads-messages.h"
+#include <glusterfs/timespec.h>
void *
-iot_worker_unordered (void *arg);
-
-void *
-iot_worker_ordered (void *arg);
-
-int
-iot_startup_worker (iot_worker_t *worker, iot_worker_fn workerfunc);
-
-void
-iot_destroy_request (iot_worker_t *worker, iot_request_t * req);
-
-void
-iot_notify_worker (iot_worker_t *worker)
-{
-#ifndef HAVE_SPINLOCK
- pthread_cond_broadcast (&worker->notifier);
-#else
- sem_post (&worker->notifier);
-#endif
-
- return;
+iot_worker(void *arg);
+int
+iot_workers_scale(iot_conf_t *conf);
+int
+__iot_workers_scale(iot_conf_t *conf);
+struct volume_options options[];
+
+#define IOT_FOP(name, frame, this, args...) \
+ do { \
+ call_stub_t *__stub = NULL; \
+ int __ret = -1; \
+ \
+ __stub = fop_##name##_stub(frame, default_##name##_resume, args); \
+ if (!__stub) { \
+ __ret = -ENOMEM; \
+ goto out; \
+ } \
+ \
+ __ret = iot_schedule(frame, this, __stub); \
+ \
+ out: \
+ if (__ret < 0) { \
+ default_##name##_failure_cbk(frame, -__ret); \
+ if (__stub != NULL) { \
+ call_stub_destroy(__stub); \
+ } \
+ } \
+ } while (0)
+
+iot_client_ctx_t *
+iot_get_ctx(xlator_t *this, client_t *client)
+{
+ iot_client_ctx_t *ctx = NULL;
+ iot_client_ctx_t *setted_ctx = NULL;
+ int i;
+
+ if (client_ctx_get(client, this, (void **)&ctx) != 0) {
+ ctx = GF_MALLOC(GF_FOP_PRI_MAX * sizeof(*ctx), gf_iot_mt_client_ctx_t);
+ if (ctx) {
+ for (i = 0; i < GF_FOP_PRI_MAX; ++i) {
+ INIT_LIST_HEAD(&ctx[i].clients);
+ INIT_LIST_HEAD(&ctx[i].reqs);
+ }
+ setted_ctx = client_ctx_set(client, this, ctx);
+ if (ctx != setted_ctx) {
+ GF_FREE(ctx);
+ ctx = setted_ctx;
+ }
+ }
+ }
+
+ return ctx;
}
-int
-iot_notify_wait (iot_worker_t *worker, int idletime)
-{
- struct timeval tv;
- struct timespec ts = {0, };
- int waitres = 0;
-
- gettimeofday (&tv, NULL);
- /* Slightly skew the idle time for threads so that, we dont
- * have all of them rushing to exit at the same time, if
- * they've been idle.
- */
- ts.tv_sec = skew_sec_idle_time (tv.tv_sec + idletime);
-
-#ifndef HAVE_SPINLOCK
- waitres = pthread_cond_timedwait (&worker->notifier, &worker->qlock,
- &ts);
-#else
- UNLOCK (&worker->qlock);
- errno = 0;
- waitres = sem_timedwait (&worker->notifier, &ts);
- LOCK (&worker->qlock);
- if (waitres < 0)
- waitres = errno;
-#endif
-
- return waitres;
-}
-
-void
-iot_notify_init (iot_worker_t *worker)
+call_stub_t *
+__iot_dequeue(iot_conf_t *conf, int *pri)
{
- if (worker == NULL)
- return;
+ call_stub_t *stub = NULL;
+ int i = 0;
+ iot_client_ctx_t *ctx;
- LOCK_INIT (&worker->qlock);
-
-#ifndef HAVE_SPINLOCK
- pthread_cond_init (&worker->notifier, NULL);
-#else
- sem_init (&worker->notifier, 0, 0);
-#endif
-
- return;
-}
-
-/* I know this function modularizes things a bit too much,
- * but it is easier on the eyes to read this than see all that locking,
- * queueing, and thread firing in the same curly block, as was the
- * case before this function.
- */
-int
-iot_request_queue_and_thread_fire (iot_worker_t *worker,
- iot_worker_fn workerfunc, iot_request_t *req)
-{
- int ret = -1;
- LOCK (&worker->qlock);
- {
- if (iot_worker_active (worker)) {
- _iot_queue (worker, req);
- ret = 0;
- }else {
- ret = iot_startup_worker (worker, workerfunc);
- if (ret < 0) {
- goto unlock;
- }
- _iot_queue (worker, req);
- }
+ *pri = -1;
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]) {
+ continue;
}
-unlock:
- UNLOCK (&worker->qlock);
-
- return ret;
-}
-
-
-int
-iot_unordered_request_balancer (iot_conf_t *conf)
-{
- long int rand = 0;
- int idx = 0;
-
- /* Decide which thread will service the request.
- * FIXME: This should change into some form of load-balancing.
- * */
- rand = random ();
-
- /* If scaling is on, we can choose from any thread
- * that has been allocated upto, max_o_threads, but
- * with scaling off, we'll never have threads more
- * than min_o_threads.
- */
- if (iot_unordered_scaling_on (conf))
- idx = (rand % conf->max_u_threads);
- else
- idx = (rand % conf->min_u_threads);
- return idx;
-}
-
-
-int
-iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
-{
- int32_t idx = 0;
- iot_worker_t *selected_worker = NULL;
- iot_request_t *req = NULL;
- int ret = -1;
-
- idx = iot_unordered_request_balancer (conf);
- selected_worker = conf->uworkers[idx];
-
- req = iot_init_request (selected_worker, stub);
- if (req == NULL) {
- ret = -ENOMEM;
- goto out;
+ if (list_empty(&conf->clients[i])) {
+ continue;
}
- ret = iot_request_queue_and_thread_fire (selected_worker,
- iot_worker_unordered, req);
- if (ret < 0) {
- iot_destroy_request (selected_worker, req);
+ /* Get the first per-client queue for this priority. */
+ ctx = list_first_entry(&conf->clients[i], iot_client_ctx_t, clients);
+ if (!ctx) {
+ continue;
}
-out:
- return ret;
-}
-
-
-/* Only to be used with ordered requests.
- */
-uint64_t
-iot_create_inode_worker_assoc (iot_conf_t * conf, inode_t * inode)
-{
- long int rand = 0;
- uint64_t idx = 0;
- rand = random ();
- /* If scaling is on, we can choose from any thread
- * that has been allocated upto, max_o_threads, but
- * with scaling off, we'll never have threads more
- * than min_o_threads.
- */
- if (iot_ordered_scaling_on (conf))
- idx = (rand % conf->max_o_threads);
- else
- idx = (rand % conf->min_o_threads);
-
- __inode_ctx_put (inode, conf->this, idx);
-
- return idx;
-}
-
-
-/* Assumes inode lock is held. */
-int32_t
-iot_ordered_request_balancer (iot_conf_t *conf, inode_t *inode, uint64_t *idx)
-{
- int ret = -1;
-
- if (__inode_ctx_get (inode, conf->this, idx) < 0)
- *idx = iot_create_inode_worker_assoc (conf, inode);
- else {
- /* Sanity check to ensure the idx received from the inode
- * context is within bounds. We're a bit optimistic in
- * assuming that if an index is within bounds, it is
- * not corrupted. idx is uint so we dont check for less
- * than 0.
- */
- if ((*idx >= (uint64_t)conf->max_o_threads)) {
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "inode context returned insane thread index %"
- PRIu64, *idx);
- ret = -EINVAL;
- goto out;
- }
+ if (list_empty(&ctx->reqs)) {
+ continue;
}
- ret = 0;
-out:
- return ret;
-}
-
-int
-iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
-{
- uint64_t idx = 0;
- iot_worker_t *selected_worker = NULL;
- iot_request_t *req = NULL;
- int balstatus = 0, ret = -1;
-
- if (inode == NULL) {
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "Got NULL inode for ordered request");
- ret = -EINVAL;
- goto out;
+ /* Get the first request on that queue. */
+ stub = list_first_entry(&ctx->reqs, call_stub_t, list);
+ list_del_init(&stub->list);
+ if (list_empty(&ctx->reqs)) {
+ list_del_init(&ctx->clients);
+ } else {
+ list_rotate_left(&conf->clients[i]);
}
- LOCK (&inode->lock);
- {
- balstatus = iot_ordered_request_balancer (conf, inode, &idx);
- if (balstatus < 0) {
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "Insane worker index. Unwinding stack");
- ret = -ECANCELED;
- goto unlock_out;
- }
- /* inode lock once acquired, cannot be left here
- * because other gluster main threads might be
- * contending on it to append a request for this file.
- * So we'll also leave the lock only after we've
- * added the request to the worker queue.
- */
- selected_worker = conf->oworkers[idx];
-
- req = iot_init_request (selected_worker, stub);
- if (req == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR,"out of memory");
- ret = -ENOMEM;
- goto unlock_out;
- }
+ conf->ac_iot_count[i]++;
+ conf->queue_marked[i] = _gf_false;
+ *pri = i;
+ break;
+ }
- ret = iot_request_queue_and_thread_fire (selected_worker,
- iot_worker_ordered,
- req);
- }
-unlock_out:
- UNLOCK (&inode->lock);
-
-out:
- if (ret < 0) {
- if (req != NULL) {
- iot_destroy_request (selected_worker, req);
- }
- }
- return ret;
-}
+ if (!stub)
+ return NULL;
+ conf->queue_size--;
+ conf->queue_sizes[*pri]--;
-int
-iot_lookup_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr,
- struct stat *postparent)
-{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr,
- postparent);
- return 0;
+ return stub;
}
-
-int
-iot_lookup_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+void
+__iot_enqueue(iot_conf_t *conf, call_stub_t *stub, int pri)
{
- STACK_WIND (frame, iot_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- loc, xattr_req);
- return 0;
-}
+ client_t *client = stub->frame->root->client;
+ iot_client_ctx_t *ctx;
+ if (pri < 0 || pri >= GF_FOP_PRI_MAX)
+ pri = GF_FOP_PRI_MAX - 1;
-int
-iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xattr_req);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create lookup stub (out of memory)");
- ret = -ENOMEM;
- goto out;
+ if (client) {
+ ctx = iot_get_ctx(THIS, client);
+ if (ctx) {
+ ctx = &ctx[pri];
}
+ } else {
+ ctx = NULL;
+ }
+ if (!ctx) {
+ ctx = &conf->no_client[pri];
+ }
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- STACK_UNWIND_STRICT (lookup, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- }
+ if (list_empty(&ctx->reqs)) {
+ list_add_tail(&ctx->clients, &conf->clients[pri]);
+ }
+ list_add_tail(&stub->list, &ctx->reqs);
- return 0;
-}
-
-
-int
-iot_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *preop, struct stat *postop)
-{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
- return 0;
+ conf->queue_size++;
+ GF_ATOMIC_INC(conf->stub_cnt);
+ conf->queue_sizes[pri]++;
}
-
-int
-iot_setattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct stat *stbuf, int32_t valid)
-{
- STACK_WIND (frame, iot_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
-}
-
-
-int
-iot_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct stat *stbuf, int32_t valid)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_setattr_stub (frame, iot_setattr_wrapper, loc, stbuf, valid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot create setattr stub"
- "(Out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
+void *
+iot_worker(void *data)
+{
+ iot_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ struct timespec sleep_till = {
+ 0,
+ };
+ int ret = 0;
+ int pri = -1;
+ gf_boolean_t bye = _gf_false;
+
+ conf = data;
+ this = conf->this;
+ THIS = this;
+
+ for (;;) {
+ pthread_mutex_lock(&conf->mutex);
+ {
+ if (pri != -1) {
+ conf->ac_iot_count[pri]--;
+ pri = -1;
+ }
+ while (conf->queue_size == 0) {
+ if (conf->down) {
+ bye = _gf_true; /*Avoid sleep*/
+ break;
}
- STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL);
- }
-
- return 0;
-}
+ clock_gettime(CLOCK_REALTIME_COARSE, &sleep_till);
+ sleep_till.tv_sec += conf->idle_time;
+ conf->sleep_count++;
+ ret = pthread_cond_timedwait(&conf->cond, &conf->mutex,
+ &sleep_till);
+ conf->sleep_count--;
-int
-iot_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *preop, struct stat *postop)
-{
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, preop, postop);
- return 0;
-}
-
-
-int
-iot_fsetattr_wrapper (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct stat *stbuf, int32_t valid)
-{
- STACK_WIND (frame, iot_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid);
- return 0;
-}
-
-
-int
-iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct stat *stbuf, int32_t valid)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsetattr_stub (frame, iot_fsetattr_wrapper, fd, stbuf,
- valid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsetattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
+ if (conf->down || ret == ETIMEDOUT) {
+ bye = _gf_true;
+ break;
}
- }
- return 0;
-}
-
-
-int
-iot_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-iot_access_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
-{
- STACK_WIND (frame, iot_access_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->access, loc, mask);
- return 0;
-}
-
-
-int
-iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_access_stub (frame, iot_access_wrapper, loc, mask);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create access stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (access, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
+ }
+
+ if (bye) {
+ if (conf->down || conf->curr_count > IOT_MIN_THREADS) {
+ conf->curr_count--;
+ if (conf->curr_count == 0)
+ pthread_cond_broadcast(&conf->cond);
+ gf_msg_debug(conf->this->name, 0,
+ "terminated. "
+ "conf->curr_count=%d",
+ conf->curr_count);
+ } else {
+ bye = _gf_false;
}
- }
- return 0;
-}
-
-
-int
-iot_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct stat *stbuf)
-{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, stbuf);
- return 0;
-}
-
-
-int
-iot_readlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
-{
- STACK_WIND (frame, iot_readlink_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readlink,
- loc, size);
- return 0;
-}
-
-
-int
-iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create readlink stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
+ }
+
+ if (!bye)
+ stub = __iot_dequeue(conf, &pri);
+ }
+ pthread_mutex_unlock(&conf->mutex);
+
+ if (stub) { /* guard against spurious wakeups */
+ if (stub->poison) {
+ gf_log(this->name, GF_LOG_INFO, "Dropping poisoned request %p.",
+ stub);
+ call_stub_destroy(stub);
+ } else {
+ call_resume(stub);
+ }
+ GF_ATOMIC_DEC(conf->stub_cnt);
+ }
+ stub = NULL;
+
+ if (bye)
+ break;
+ }
+
+ return NULL;
+}
+
+int
+do_iot_schedule(iot_conf_t *conf, call_stub_t *stub, int pri)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&conf->mutex);
+ {
+ __iot_enqueue(conf, stub, pri);
+
+ pthread_cond_signal(&conf->cond);
+
+ ret = __iot_workers_scale(conf);
+ }
+ pthread_mutex_unlock(&conf->mutex);
+
+ return ret;
+}
+
+char *
+iot_get_pri_meaning(gf_fop_pri_t pri)
+{
+ char *name = NULL;
+ switch (pri) {
+ case GF_FOP_PRI_HI:
+ name = "fast";
+ break;
+ case GF_FOP_PRI_NORMAL:
+ name = "normal";
+ break;
+ case GF_FOP_PRI_LO:
+ name = "slow";
+ break;
+ case GF_FOP_PRI_LEAST:
+ name = "least";
+ break;
+ case GF_FOP_PRI_MAX:
+ name = "invalid";
+ break;
+ case GF_FOP_PRI_UNSPEC:
+ name = "unspecified";
+ break;
+ }
+ return name;
+}
+
+int
+iot_schedule(call_frame_t *frame, xlator_t *this, call_stub_t *stub)
+{
+ int ret = -1;
+ gf_fop_pri_t pri = GF_FOP_PRI_MAX - 1;
+ iot_conf_t *conf = this->private;
+
+ if ((frame->root->pid < GF_CLIENT_PID_MAX) &&
+ (frame->root->pid != GF_CLIENT_PID_NO_ROOT_SQUASH) &&
+ conf->least_priority) {
+ pri = GF_FOP_PRI_LEAST;
+ goto out;
+ }
+
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ case GF_FOP_STAT:
+ case GF_FOP_FSTAT:
+ case GF_FOP_LOOKUP:
+ case GF_FOP_ACCESS:
+ case GF_FOP_READLINK:
+ case GF_FOP_OPENDIR:
+ case GF_FOP_STATFS:
+ case GF_FOP_READDIR:
+ case GF_FOP_READDIRP:
+ case GF_FOP_GETACTIVELK:
+ case GF_FOP_SETACTIVELK:
+ case GF_FOP_ICREATE:
+ case GF_FOP_NAMELINK:
+ pri = GF_FOP_PRI_HI;
+ break;
+
+ case GF_FOP_CREATE:
+ case GF_FOP_FLUSH:
+ case GF_FOP_LK:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LEASE:
+ case GF_FOP_UNLINK:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_RENAME:
+ case GF_FOP_LINK:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_GETXATTR:
+ case GF_FOP_FGETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ case GF_FOP_PUT:
+ pri = GF_FOP_PRI_NORMAL;
+ break;
+
+ case GF_FOP_READ:
+ case GF_FOP_WRITE:
+ case GF_FOP_FSYNC:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FSYNCDIR:
+ case GF_FOP_XATTROP:
+ case GF_FOP_FXATTROP:
+ case GF_FOP_RCHECKSUM:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_SEEK:
+ pri = GF_FOP_PRI_LO;
+ break;
+
+ case GF_FOP_FORGET:
+ case GF_FOP_RELEASE:
+ case GF_FOP_RELEASEDIR:
+ case GF_FOP_GETSPEC:
+ break;
+ case GF_FOP_IPC:
+ default:
+ return -EINVAL;
+ }
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ gf_msg_debug(this->name, 0, "%s scheduled as %s priority fop",
+ gf_fop_list[stub->fop], iot_get_pri_meaning(pri));
+ if (this->private)
+ ret = do_iot_schedule(this->private, stub, pri);
+ return ret;
}
-
int
-iot_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+iot_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ IOT_FOP(lookup, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_mknod_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+iot_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, iot_mknod_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mknod, loc, mode, rdev);
- return 0;
+ IOT_FOP(setattr, frame, this, loc, stbuf, valid, xdata);
+ return 0;
}
-
int
-iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_mknod_stub (frame, iot_mknod_wrapper, loc, mode, rdev);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create mknod stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_mkdir_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+iot_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ IOT_FOP(fsetattr, frame, this, fd, stbuf, valid, xdata);
+ return 0;
}
-
int
-iot_mkdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_mkdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mkdir, loc, mode);
- return 0;
+ IOT_FOP(access, frame, this, loc, mask, xdata);
+ return 0;
}
-
int
-iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create mkdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(readlink, frame, this, loc, size, xdata);
+ return 0;
}
-
int
-iot_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *preparent,
- struct stat *postparent)
+iot_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent);
- return 0;
+ IOT_FOP(mknod, frame, this, loc, mode, rdev, umask, xdata);
+ return 0;
}
-
int
-iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, iot_rmdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rmdir, loc);
- return 0;
+ IOT_FOP(mkdir, frame, this, loc, mode, umask, xdata);
+ return 0;
}
-
int
-iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create rmdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(rmdir, frame, this, loc, flags, xdata);
+ return 0;
}
-
int
-iot_symlink_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+iot_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ IOT_FOP(symlink, frame, this, linkname, loc, umask, xdata);
+ return 0;
}
-
int
-iot_symlink_wrapper (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
+iot_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_symlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->symlink, linkname, loc);
- return 0;
-}
-
-
-int
-iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_symlink_stub (frame, iot_symlink_wrapper, linkname, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create symlink stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(rename, frame, this, oldloc, newloc, xdata);
+ return 0;
}
-
int
-iot_rename_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf,
- struct stat *preoldparent, struct stat *postoldparent,
- struct stat *prenewparent, struct stat *postnewparent)
+iot_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- return 0;
+ IOT_FOP(open, frame, this, loc, flags, fd, xdata);
+ return 0;
}
-
int
-iot_rename_wrapper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+iot_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, iot_rename_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rename, oldloc, newloc);
- return 0;
+ IOT_FOP(create, frame, this, loc, flags, mode, umask, fd, xdata);
+ return 0;
}
-
int
-iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+iot_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc);
- if (!stub) {
- gf_log (this->name, GF_LOG_DEBUG, "cannot create rename stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- oldloc->inode, stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int
-iot_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
-{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
-}
-
-
-int
-iot_open_wrapper (call_frame_t * frame, xlator_t * this, loc_t *loc,
- int32_t flags, fd_t * fd, int32_t wbflags)
-{
- STACK_WIND (frame, iot_open_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open, loc, flags, fd, wbflags);
- return 0;
-}
-
-
-int
-iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd, wbflags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create open call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(put, frame, this, loc, mode, umask, flags, vector, count, offset,
+ iobref, xattr, xdata);
+ return 0;
}
-
int
-iot_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *stbuf, struct stat *preparent,
- struct stat *postparent)
+iot_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent);
- return 0;
-}
-
-
-int
-iot_create_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
-{
- STACK_WIND (frame, iot_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
-}
-
-
-int
-iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_create_stub (frame, iot_create_wrapper, loc, flags, mode,
- fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create \"create\" call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL,
- NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(readv, frame, this, fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+iot_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
-
- return 0;
+ IOT_FOP(flush, frame, this, fd, xdata);
+ return 0;
}
-
int
-iot_readv_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+iot_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
+ IOT_FOP(fsync, frame, this, fd, datasync, xdata);
+ return 0;
}
-
int
-iot_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+iot_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create readv call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, -1, NULL,
- NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(writev, frame, this, fd, vector, count, offset, flags, iobref,
+ xdata);
+ return 0;
}
-
int
-iot_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ IOT_FOP(lk, frame, this, fd, cmd, flock, xdata);
+ return 0;
}
-
int
-iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+iot_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame, iot_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ IOT_FOP(stat, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_flush_stub (frame, iot_flush_wrapper, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create flush_cbk call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (flush, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+iot_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ IOT_FOP(fstat, frame, this, fd, xdata);
+ return 0;
}
-
int
-iot_fsync_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+iot_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_fsync_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
- return 0;
+ IOT_FOP(truncate, frame, this, loc, offset, xdata);
+ return 0;
}
-
int
-iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
+iot_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fsync_cbk call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsync, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(ftruncate, frame, this, fd, offset, xdata);
+ return 0;
}
-
int
-iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+iot_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ IOT_FOP(unlink, frame, this, loc, xflag, xdata);
+ return 0;
}
-
int
-iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
+iot_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
- return 0;
+ IOT_FOP(link, frame, this, oldloc, newloc, xdata);
+ return 0;
}
-
int
-iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_writev_stub (frame, iot_writev_wrapper,
- fd, vector, count, offset, iobref);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create writev call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-iot_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *flock)
+iot_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock);
- return 0;
+ IOT_FOP(opendir, frame, this, loc, fd, xdata);
+ return 0;
}
-
int
-iot_lk_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct flock *flock)
+iot_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, flock);
- return 0;
+ IOT_FOP(fsyncdir, frame, this, fd, datasync, xdata);
+ return 0;
}
-
int
-iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *flock)
+iot_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_lk call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (lk, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(statfs, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+iot_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
- return 0;
+ IOT_FOP(setxattr, frame, this, loc, dict, flags, xdata);
+ return 0;
}
-
int
-iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
-}
+ iot_conf_t *conf = NULL;
+ dict_t *depths = NULL;
+ int i = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ conf = this->private;
-int
-iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_stat_stub (frame, iot_stat_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_stat call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- /* File is not open, so we can send it through unordered pool.
+ if (name && strcmp(name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+ /*
+ * We explicitly do not want a reference count
+ * for this dict in this translator
*/
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
+ depths = dict_new();
+ if (!depths) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind_special_getxattr;
}
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (stat, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (dict_set_int32(depths, (char *)fop_pri_to_string(i),
+ conf->queue_sizes[i]) != 0) {
+ dict_unref(depths);
+ depths = NULL;
+ goto unwind_special_getxattr;
+ }
}
- return 0;
-}
+ unwind_special_getxattr:
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, depths, xdata);
+ if (depths)
+ dict_unref(depths);
+ return 0;
+ }
-int
-iot_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
- return 0;
+ IOT_FOP(getxattr, frame, this, loc, name, xdata);
+ return 0;
}
-
int
-iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+iot_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
-}
-
-
-int
-iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_fstat call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fstat, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(fgetxattr, frame, this, fd, name, xdata);
+ return 0;
}
-
int
-iot_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+iot_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ IOT_FOP(fsetxattr, frame, this, fd, dict, flags, xdata);
+ return 0;
}
-
int
-iot_truncate_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+iot_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- STACK_WIND (frame, iot_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
-}
-
-
-int
-iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- call_stub_t *stub;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_stat call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(removexattr, frame, this, loc, name, xdata);
+ return 0;
}
-
int
-iot_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+iot_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ IOT_FOP(fremovexattr, frame, this, fd, name, xdata);
+ return 0;
}
-
int
-iot_ftruncate_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+iot_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, iot_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
-}
-
-
-int
-iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_ftruncate call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(readdirp, frame, this, fd, size, offset, xdata);
+ return 0;
}
-
int
-iot_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint8_t *file_checksum,
- uint8_t *dir_checksum)
+iot_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_UNWIND_STRICT (checksum, frame, op_ret, op_errno, file_checksum,
- dir_checksum);
- return 0;
+ IOT_FOP(readdir, frame, this, fd, size, offset, xdata);
+ return 0;
}
-
int
-iot_checksum_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags)
+iot_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_WIND (frame, iot_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc, flags);
-
- return 0;
+ IOT_FOP(inodelk, frame, this, volume, loc, cmd, lock, xdata);
+ return 0;
}
-
int
-iot_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_checksum_stub (frame, iot_checksum_wrapper, loc, flags);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_checksum call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (checksum, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *preparent,
- struct stat *postparent)
+iot_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent);
- return 0;
+ IOT_FOP(finodelk, frame, this, volume, fd, cmd, lock, xdata);
+ return 0;
}
-
int
-iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
- STACK_WIND (frame, iot_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
-
- return 0;
+ IOT_FOP(entrylk, frame, this, volume, loc, basename, cmd, type, xdata);
+ return 0;
}
-
int
-iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_unlink call stub"
- "(out of memory)");
- ret = -1;
- goto out;
- }
-
- ret = iot_schedule_unordered((iot_conf_t *)this->private, loc->inode,
- stub);
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
-
- return 0;
+ IOT_FOP(fentrylk, frame, this, volume, fd, basename, cmd, type, xdata);
+ return 0;
}
-
int
-iot_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent, struct stat *postparent)
+iot_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ IOT_FOP(xattrop, frame, this, loc, optype, xattr, xdata);
+ return 0;
}
-
int
-iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new)
+iot_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_WIND (frame, iot_link_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->link, old, new);
-
- return 0;
+ IOT_FOP(fxattrop, frame, this, fd, optype, xattr, xdata);
+ return 0;
}
-
-int
-iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+int32_t
+iot_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create link stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- oldloc->inode, stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL,
- NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(rchecksum, frame, this, fd, offset, len, xdata);
+ return 0;
}
-
int
-iot_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
- return 0;
+ IOT_FOP(fallocate, frame, this, fd, mode, offset, len, xdata);
+ return 0;
}
-
int
-iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+iot_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- STACK_WIND (frame, iot_opendir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->opendir, loc, fd);
- return 0;
+ IOT_FOP(discard, frame, this, fd, offset, len, xdata);
+ return 0;
}
-
int
-iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create opendir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(zerofill, frame, this, fd, offset, len, xdata);
+ return 0;
}
-
int
-iot_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
- return 0;
+ IOT_FOP(seek, frame, this, fd, offset, what, xdata);
+ return 0;
}
-
int
-iot_fsyncdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync)
+iot_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- STACK_WIND (frame, iot_fsyncdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsyncdir, fd, datasync);
- return 0;
+ IOT_FOP(lease, frame, this, loc, lease, xdata);
+ return 0;
}
-
int
-iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+iot_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsyncdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ IOT_FOP(getactivelk, frame, this, loc, xdata);
+ return 0;
}
-
int
-iot_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+iot_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
- return 0;
+ IOT_FOP(setactivelk, frame, this, loc, locklist, xdata);
+ return 0;
}
-
int
-iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+__iot_workers_scale(iot_conf_t *conf)
{
- STACK_WIND (frame, iot_statfs_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->statfs, loc);
- return 0;
-}
-
-
-int
-iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create statfs stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (statfs, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ int scale = 0;
+ int diff = 0;
+ pthread_t thread;
+ int ret = 0;
+ int i = 0;
+ for (i = 0; i < GF_FOP_PRI_MAX; i++)
+ scale += min(conf->queue_sizes[i], conf->ac_iot_limit[i]);
-int
-iot_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-iot_setxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- STACK_WIND (frame, iot_setxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setxattr, loc, dict, flags);
- return 0;
-}
-
+ if (scale < IOT_MIN_THREADS)
+ scale = IOT_MIN_THREADS;
-int
-iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_setxattr_stub (frame, iot_setxattr_wrapper, loc, dict,
- flags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create setxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ if (scale > conf->max_count)
+ scale = conf->max_count;
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ if (conf->curr_count < scale) {
+ diff = scale - conf->curr_count;
+ }
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (setxattr, frame, -1, -ret);
+ while (diff) {
+ diff--;
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ ret = gf_thread_create(&thread, &conf->w_attr, iot_worker, conf,
+ "iotwr%03hx", conf->curr_count & 0x3ff);
+ if (ret == 0) {
+ pthread_detach(thread);
+ conf->curr_count++;
+ gf_msg_debug(conf->this->name, 0,
+ "scaled threads to %d (queue_size=%d/%d)",
+ conf->curr_count, conf->queue_size, scale);
+ } else {
+ break;
}
- return 0;
-}
-
+ }
-int
-iot_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
- return 0;
+ return diff;
}
-
int
-iot_getxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+iot_workers_scale(iot_conf_t *conf)
{
- STACK_WIND (frame, iot_getxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->getxattr, loc, name);
- return 0;
-}
+ int ret = -1;
+ if (conf == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
-int
-iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create getxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ pthread_mutex_lock(&conf->mutex);
+ {
+ ret = __iot_workers_scale(conf);
+ }
+ pthread_mutex_unlock(&conf->mutex);
out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
- return 0;
+ return ret;
}
-
int
-iot_fgetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+set_stack_size(iot_conf_t *conf)
{
- STACK_WIND (frame, iot_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- return 0;
-}
-
+ int err = 0;
+ size_t stacksize = IOT_THREAD_STACK_SIZE;
+ xlator_t *this = NULL;
-int
-iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fgetxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ this = THIS;
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, -ret, NULL);
+ err = pthread_attr_init(&conf->w_attr);
+ if (err != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, err, IO_THREADS_MSG_INIT_FAILED,
+ NULL);
+ return err;
+ }
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
+ err = pthread_attr_setstacksize(&conf->w_attr, stacksize);
+ if (err == EINVAL) {
+ err = pthread_attr_getstacksize(&conf->w_attr, &stacksize);
+ if (!err) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET,
+ "size=%zd", stacksize, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_SIZE_NOT_SET,
+ NULL);
+ err = 0;
}
- return 0;
-}
+ }
-
-int
-iot_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
- return 0;
+ conf->stack_size = stacksize;
+ return err;
}
-
-int
-iot_fsetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+int32_t
+mem_acct_init(xlator_t *this)
{
- STACK_WIND (frame, iot_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags);
- return 0;
-}
+ int ret = -1;
+ if (!this)
+ return ret;
-int
-iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fsetxattr_stub (frame, iot_fsetxattr_wrapper, fd, dict,
- flags);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fsetxattr stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ ret = xlator_mem_acct_init(this, gf_iot_mt_end + 1);
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, -ret);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_NO_MEMORY,
+ NULL);
+ return ret;
+ }
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ return ret;
}
-
int
-iot_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+iot_priv_dump(xlator_t *this)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
- return 0;
-}
+ iot_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
-
-int
-iot_removexattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- STACK_WIND (frame, iot_removexattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->removexattr, loc, name);
+ if (!this)
return 0;
-}
-
-
-int
-iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_removexattr_stub (frame, iot_removexattr_wrapper, loc,
- name);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,"cannot get removexattr fop"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (removexattr, frame, -1, -ret);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
+ conf = this->private;
+ if (!conf)
return 0;
-}
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
-int
-iot_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
- return 0;
-}
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count);
+ gf_proc_dump_write("current_threads_count", "%d", conf->curr_count);
+ gf_proc_dump_write("sleep_count", "%d", conf->sleep_count);
+ gf_proc_dump_write("idle_time", "%d", conf->idle_time);
+ gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
+ gf_proc_dump_write("max_high_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_HI]);
+ gf_proc_dump_write("max_normal_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL]);
+ gf_proc_dump_write("max_low_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_LO]);
+ gf_proc_dump_write("max_least_priority_threads", "%d",
+ conf->ac_iot_limit[GF_FOP_PRI_LEAST]);
+ gf_proc_dump_write("current_high_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_HI]);
+ gf_proc_dump_write("current_normal_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_NORMAL]);
+ gf_proc_dump_write("current_low_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_LO]);
+ gf_proc_dump_write("current_least_priority_threads", "%d",
+ conf->ac_iot_count[GF_FOP_PRI_LEAST]);
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (!conf->queue_sizes[i])
+ continue;
+ snprintf(key, sizeof(key), "%s_priority_queue_length",
+ iot_get_pri_meaning(i));
+ gf_proc_dump_write(key, "%d", conf->queue_sizes[i]);
+ }
-int
-iot_readdirp_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
-{
- STACK_WIND (frame, iot_readdirp_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdirp, fd, size, offset);
- return 0;
-}
-
-
-int
-iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readdirp_stub (frame, iot_readdirp_wrapper, fd, size,
- offset);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
+ return 0;
}
+/*
+ * We use a decay model to keep track and make sure we're not spawning new
+ * threads too often. Each increment adds a large value to a counter, and that
+ * counter keeps ticking back down to zero over a fairly long period. For
+ * example, let's use ONE_WEEK=604800 seconds, and we want to detect when we
+ * have N=3 increments during that time. Thus, our threshold is
+ * (N-1)*ONE_WEEK. To see how it works, look at three examples.
+ *
+ * (a) Two events close together, then one more almost a week later. The
+ * first two events push our counter to 2*ONE_WEEK plus a bit. At the third
+ * event, we decay down to ONE_WEEK plus a bit and then add ONE_WEEK for the
+ * new event, exceeding our threshold.
+ *
+ * (b) One event, then two more almost a week later. At the time of the
+ * second and third events, the counter is already non-zero, so when we add
+ * 2*ONE_WEEK we exceed again.
+ *
+ * (c) Three events, spaced three days apart. At the time of the second
+ * event, we decay down to approxitely ONE_WEEK*4/7 and then add another
+ * ONE_WEEK. At the third event, we decay again down to ONE_WEEK*8/7 and add
+ * another ONE_WEEK, so boom.
+ *
+ * Note that in all three cases if that last event came a day later our counter
+ * would have decayed a bit more and we would *not* exceed our threshold. It's
+ * not exactly the same as a precise "three in one week" limit, but it's very
+ * close and it allows the same kind of tweaking while requiring only constant
+ * space - no arrays of variable length N to allocate or maintain. All we need
+ * (for each queue) is the value plus the time of the last update.
+ */
-int
-iot_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
- return 0;
-}
-
+typedef struct {
+ time_t update_time;
+ uint32_t value;
+} threshold_t;
+/*
+ * Variables so that I can hack these for testing.
+ * TBD: make these tunable?
+ */
+static uint32_t THRESH_SECONDS = 604800;
+static uint32_t THRESH_EVENTS = 3;
+static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */
-int
-iot_readdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+static void
+iot_apply_event(xlator_t *this, threshold_t *thresh)
{
- STACK_WIND (frame, iot_readdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdir, fd, size, offset);
- return 0;
-}
+ time_t delta, now = gf_time();
+ /* Refresh for manual testing/debugging. It's cheap. */
+ THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1);
-int
-iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset);
- if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (readdir, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
+ if (thresh->value && thresh->update_time) {
+ delta = now - thresh->update_time;
+ /* Be careful about underflow. */
+ if (thresh->value <= delta) {
+ thresh->value = 0;
+ } else {
+ thresh->value -= delta;
+ }
+ }
+
+ thresh->value += THRESH_SECONDS;
+ if (thresh->value >= THRESH_LIMIT) {
+ gf_log(this->name, GF_LOG_EMERG, "watchdog firing too often");
+ /*
+ * The default action for SIGTRAP is to dump core, but the fact
+ * that it's distinct from other signals we use means that
+ * there are other possibilities as well (e.g. drop into gdb or
+ * invoke a special handler).
+ */
+ kill(getpid(), SIGTRAP);
+ }
+
+ thresh->update_time = now;
+}
+
+static void *
+iot_watchdog(void *arg)
+{
+ xlator_t *this = arg;
+ iot_conf_t *priv = this->private;
+ int i;
+ int bad_times[GF_FOP_PRI_MAX] = {
+ 0,
+ };
+ threshold_t thresholds[GF_FOP_PRI_MAX] = {{
+ 0,
+ }};
+
+ for (;;) {
+ sleep(max(priv->watchdog_secs / 5, 1));
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ pthread_mutex_lock(&priv->mutex);
+ for (i = 0; i < GF_FOP_PRI_MAX; ++i) {
+ if (priv->queue_marked[i]) {
+ if (++bad_times[i] >= 5) {
+ gf_log(this->name, GF_LOG_WARNING, "queue %d stalled", i);
+ iot_apply_event(this, &thresholds[i]);
+ /*
+ * We might not get here if the event
+ * put us over our threshold.
+ */
+ ++(priv->ac_iot_limit[i]);
+ bad_times[i] = 0;
}
+ } else {
+ bad_times[i] = 0;
+ }
+ priv->queue_marked[i] = (priv->queue_sizes[i] > 0);
}
- return 0;
-}
-
+ pthread_mutex_unlock(&priv->mutex);
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
-int
-iot_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr);
- return 0;
+ /* NOTREACHED */
+ return NULL;
}
-
-int
-iot_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+static void
+start_iot_watchdog(xlator_t *this)
{
- STACK_WIND (frame, iot_xattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr);
- return 0;
-}
-
-
-int
-iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_xattrop_stub (frame, iot_xattrop_wrapper, loc, optype,
- xattr);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create xattrop stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (xattrop, frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ iot_conf_t *priv = this->private;
+ int ret;
+ if (priv->watchdog_running) {
+ return;
+ }
-int
-iot_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, xattr);
- return 0;
+ ret = pthread_create(&priv->watchdog_thread, NULL, iot_watchdog, this);
+ if (ret == 0) {
+ priv->watchdog_running = _gf_true;
+ } else {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_create(iot_watchdog) failed");
+ }
}
-int
-iot_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+static void
+stop_iot_watchdog(xlator_t *this)
{
- STACK_WIND (frame, iot_fxattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr);
- return 0;
-}
-
-int
-iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_fxattrop_stub (frame, iot_fxattrop_wrapper, fd, optype,
- xattr);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fxattrop stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
+ iot_conf_t *priv = this->private;
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND_STRICT (fxattrop, frame, -1, -ret, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
+ if (!priv->watchdog_running) {
+ return;
+ }
+ if (pthread_cancel(priv->watchdog_thread) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_cancel(iot_watchdog) failed");
+ }
-/* Must be called with worker lock held */
-void
-_iot_queue (iot_worker_t *worker, iot_request_t *req)
-{
- list_add_tail (&req->list, &worker->rqlist);
+ if (pthread_join(priv->watchdog_thread, NULL) != 0) {
+ gf_log(this->name, GF_LOG_WARNING, "pthread_join(iot_watchdog) failed");
+ }
- /* dq_cond */
- worker->queue_size++;
- iot_notify_worker(worker);
+ /* Failure probably means it's already dead. */
+ priv->watchdog_running = _gf_false;
}
-
-iot_request_t *
-iot_init_request (iot_worker_t *worker, call_stub_t *stub)
+int
+reconfigure(xlator_t *this, dict_t *options)
{
- iot_request_t *req = NULL;
+ iot_conf_t *conf = NULL;
+ int ret = -1;
- req = mem_get (worker->req_pool);
- if (req == NULL) {
- goto out;
- }
+ conf = this->private;
+ if (!conf)
+ goto out;
- req->stub = stub;
-out:
- return req;
-}
+ GF_OPTION_RECONF("thread-count", conf->max_count, options, int32, out);
+ GF_OPTION_RECONF("high-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_HI],
+ options, int32, out);
-void
-iot_destroy_request (iot_worker_t *worker, iot_request_t * req)
-{
- if ((req == NULL) || (worker == NULL))
- return;
+ GF_OPTION_RECONF("normal-prio-threads",
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL], options, int32,
+ out);
- mem_put (worker->req_pool, req);
-}
+ GF_OPTION_RECONF("low-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LO],
+ options, int32, out);
+ GF_OPTION_RECONF("least-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LEAST],
+ options, int32, out);
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_can_ordered_exit (iot_worker_t * worker)
-{
- gf_boolean_t allow_exit = _gf_false;
- iot_conf_t *conf = NULL;
+ GF_OPTION_RECONF("enable-least-priority", conf->least_priority, options,
+ bool, out);
- conf = worker->conf;
- /* We dont want this thread to exit if its index is
- * below the min thread count.
- */
- if (worker->thread_idx >= conf->min_o_threads)
- allow_exit = _gf_true;
+ GF_OPTION_RECONF("cleanup-disconnected-reqs",
+ conf->cleanup_disconnected_reqs, options, bool, out);
- return allow_exit;
-}
+ GF_OPTION_RECONF("watchdog-secs", conf->watchdog_secs, options, int32, out);
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_ordered_exit (int cond_waitres, iot_worker_t *worker)
-{
- gf_boolean_t allow_exit = _gf_false;
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- allow_exit = _gf_true;
- } else if (cond_waitres == ETIMEDOUT) {
- allow_exit = iot_can_ordered_exit (worker);
- }
-
- if (allow_exit) {
- worker->state = IOT_STATE_DEAD;
- worker->thread = 0;
- }
+ if (conf->watchdog_secs > 0) {
+ start_iot_watchdog(this);
+ } else {
+ stop_iot_watchdog(this);
+ }
- return allow_exit;
+ ret = 0;
+out:
+ return ret;
}
-
int
-iot_ordered_request_wait (iot_worker_t * worker)
+init(xlator_t *this)
{
- int waitres = 0;
- int retstat = 0;
+ iot_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- retstat = -1;
- goto out;
- }
+ if (!this->children || this->children->next) {
+ gf_smsg("io-threads", GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);
+ goto out;
+ }
- waitres = iot_notify_wait (worker, worker->conf->o_idle_time);
- if (iot_ordered_exit (waitres, worker)) {
- retstat = -1;
- }
+ if (!this->parents) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, IO_THREADS_MSG_VOL_MISCONFIGURED,
+ NULL);
+ }
-out:
- return retstat;
-}
+ conf = (void *)GF_CALLOC(1, sizeof(*conf), gf_iot_mt_iot_conf_t);
+ if (conf == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_THREADS_MSG_OUT_OF_MEMORY,
+ NULL);
+ goto out;
+ }
+ if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_PTHREAD_INIT_FAILED,
+ "pthread_cond_init ret=%d", ret, NULL);
+ goto out;
+ }
+ conf->cond_inited = _gf_true;
-call_stub_t *
-iot_dequeue_ordered (iot_worker_t *worker)
-{
- call_stub_t *stub = NULL;
- iot_request_t *req = NULL;
- int waitstat = 0;
+ if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, IO_THREADS_MSG_PTHREAD_INIT_FAILED,
+ "pthread_mutex_init ret=%d", ret, NULL);
+ goto out;
+ }
+ conf->mutex_inited = _gf_true;
- LOCK (&worker->qlock);
- {
- while (!worker->queue_size) {
- waitstat = 0;
- waitstat = iot_ordered_request_wait (worker);
- /* We must've timed out and are now required to
- * exit.
- */
- if (waitstat == -1)
- goto out;
- }
+ ret = set_stack_size(conf);
- list_for_each_entry (req, &worker->rqlist, list)
- break;
- list_del (&req->list);
- stub = req->stub;
+ if (ret != 0)
+ goto out;
- worker->queue_size--;
- }
-out:
- UNLOCK (&worker->qlock);
- iot_destroy_request (worker, req);
+ ret = -1;
- return stub;
-}
+ GF_OPTION_INIT("thread-count", conf->max_count, int32, out);
+ GF_OPTION_INIT("high-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_HI],
+ int32, out);
-void *
-iot_worker_ordered (void *arg)
-{
- iot_worker_t *worker = arg;
- call_stub_t *stub = NULL;
+ GF_OPTION_INIT("normal-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_NORMAL],
+ int32, out);
- while (1) {
+ GF_OPTION_INIT("low-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LO], int32,
+ out);
- stub = iot_dequeue_ordered (worker);
- /* If stub is NULL, we must've timed out waiting for a
- * request and have now been allowed to exit.
- */
- if (!stub)
- break;
- call_resume (stub);
- }
+ GF_OPTION_INIT("least-prio-threads", conf->ac_iot_limit[GF_FOP_PRI_LEAST],
+ int32, out);
- return NULL;
-}
+ GF_OPTION_INIT("idle-time", conf->idle_time, int32, out);
+ GF_OPTION_INIT("enable-least-priority", conf->least_priority, bool, out);
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_can_unordered_exit (iot_worker_t * worker)
-{
- gf_boolean_t allow_exit = _gf_false;
- iot_conf_t *conf = NULL;
+ GF_OPTION_INIT("cleanup-disconnected-reqs", conf->cleanup_disconnected_reqs,
+ bool, out);
- conf = worker->conf;
- /* We dont want this thread to exit if its index is
- * below the min thread count.
- */
- if (worker->thread_idx >= conf->min_u_threads)
- allow_exit = _gf_true;
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
- return allow_exit;
-}
+ conf->this = this;
+ GF_ATOMIC_INIT(conf->stub_cnt, 0);
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ INIT_LIST_HEAD(&conf->clients[i]);
+ INIT_LIST_HEAD(&conf->no_client[i].clients);
+ INIT_LIST_HEAD(&conf->no_client[i].reqs);
+ }
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_unordered_exit (int cond_waitres, iot_worker_t *worker)
-{
- gf_boolean_t allow_exit = _gf_false;
-
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- allow_exit = _gf_true;
- } else if (cond_waitres == ETIMEDOUT) {
- allow_exit = iot_can_unordered_exit (worker);
- }
+ if (!this->pass_through) {
+ ret = iot_workers_scale(conf);
- if (allow_exit) {
- worker->state = IOT_STATE_DEAD;
- worker->thread = 0;
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ IO_THREADS_MSG_WORKER_THREAD_INIT_FAILED, NULL);
+ goto out;
}
+ }
- return allow_exit;
-}
-
-
-int
-iot_unordered_request_wait (iot_worker_t * worker)
-{
- int waitres = 0;
- int retstat = 0;
-
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- retstat = -1;
- goto out;
- }
+ this->private = conf;
- waitres = iot_notify_wait (worker, worker->conf->u_idle_time);
- if (iot_unordered_exit (waitres, worker)) {
- retstat = -1;
- }
+ conf->watchdog_secs = 0;
+ GF_OPTION_INIT("watchdog-secs", conf->watchdog_secs, int32, out);
+ if (conf->watchdog_secs > 0) {
+ start_iot_watchdog(this);
+ }
+ ret = 0;
out:
- return retstat;
-}
-
-
-call_stub_t *
-iot_dequeue_unordered (iot_worker_t *worker)
-{
- call_stub_t *stub= NULL;
- iot_request_t *req = NULL;
- int waitstat = 0;
-
- LOCK (&worker->qlock);
- {
- while (!worker->queue_size) {
- waitstat = 0;
- waitstat = iot_unordered_request_wait (worker);
- /* If -1, request wait must've timed
- * out.
- */
- if (waitstat == -1)
- goto out;
+ if (ret)
+ GF_FREE(conf);
+
+ return ret;
+}
+
+static void
+iot_exit_threads(iot_conf_t *conf)
+{
+ pthread_mutex_lock(&conf->mutex);
+ {
+ conf->down = _gf_true;
+ /*Let all the threads know that xl is going down*/
+ pthread_cond_broadcast(&conf->cond);
+ while (conf->curr_count) /*Wait for threads to exit*/
+ pthread_cond_wait(&conf->cond, &conf->mutex);
+ }
+ pthread_mutex_unlock(&conf->mutex);
+}
+
+int
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ iot_conf_t *conf = this->private;
+ xlator_t *victim = data;
+ uint64_t stub_cnt = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
+
+ if (GF_EVENT_PARENT_DOWN == event) {
+ if (victim->cleanup_starting) {
+ /* Wait for draining stub from queue before notify PARENT_DOWN */
+ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt);
+ if (stub_cnt) {
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ pthread_mutex_lock(&conf->mutex);
+ {
+ while (stub_cnt) {
+ (void)pthread_cond_timedwait(&conf->cond, &conf->mutex,
+ &sleep_till);
+ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt);
+ }
}
+ pthread_mutex_unlock(&conf->mutex);
+ }
- list_for_each_entry (req, &worker->rqlist, list)
- break;
- list_del (&req->list);
- stub = req->stub;
-
- worker->queue_size--;
- }
-out:
- UNLOCK (&worker->qlock);
- iot_destroy_request (worker, req);
-
- return stub;
-}
-
-
-void *
-iot_worker_unordered (void *arg)
-{
- iot_worker_t *worker = arg;
- call_stub_t *stub = NULL;
-
- while (1) {
-
- stub = iot_dequeue_unordered (worker);
- /* If no request was received, we must've timed out,
- * and can exit. */
- if (!stub)
- break;
-
- call_resume (stub);
- }
-
- return NULL;
-}
-
-
-void
-deallocate_worker_array (iot_worker_t **workers)
-{
- FREE (workers);
-}
-
-void
-deallocate_workers (iot_worker_t **workers,
- int start_alloc_idx, int count)
-{
- int i;
- int end_count;
-
- end_count = count + start_alloc_idx;
- for (i = start_alloc_idx; (i < end_count); i++) {
- if (workers[i] != NULL) {
- mem_pool_destroy (workers[i]->req_pool);
- FREE (workers[i]);
- workers[i] = NULL;
- }
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
+ } else {
+ iot_exit_threads(conf);
}
-
-}
-
-
-iot_worker_t **
-allocate_worker_array (int count)
-{
- iot_worker_t **warr = NULL;
-
- warr = CALLOC (count, sizeof (iot_worker_t *));
+ }
- return warr;
-}
-
-
-iot_worker_t *
-allocate_worker (iot_conf_t * conf)
-{
- iot_worker_t *wrk = NULL;
-
- wrk = CALLOC (1, sizeof (iot_worker_t));
- if (wrk == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- goto out;
+ if (GF_EVENT_CHILD_DOWN == event) {
+ if (victim->cleanup_starting) {
+ iot_exit_threads(conf);
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name);
}
+ }
- wrk->req_pool = mem_pool_new (iot_request_t, IOT_REQUEST_MEMPOOL_SIZE);
- if (wrk->req_pool == NULL)
- goto free_wrk;
-
- INIT_LIST_HEAD (&wrk->rqlist);
- wrk->conf = conf;
- iot_notify_init (wrk);
- wrk->state = IOT_STATE_DEAD;
-
-out:
- return wrk;
+ default_notify(this, event, data);
-free_wrk:
- FREE (wrk);
- return NULL;
+ return 0;
}
-
-int
-allocate_workers (iot_conf_t *conf, iot_worker_t **workers, int start_alloc_idx,
- int count)
-{
- int i;
- int end_count, ret = -1;
-
- end_count = count + start_alloc_idx;
- for (i = start_alloc_idx; i < end_count; i++) {
- workers[i] = allocate_worker (conf);
- if (workers[i] == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- workers[i]->thread_idx = i;
- }
- ret = 0;
-
-out:
- return ret;
-}
-
-
void
-iot_stop_worker (iot_worker_t *worker)
+fini(xlator_t *this)
{
- LOCK (&worker->qlock);
- {
- worker->state = IOT_STATE_EXIT_REQUEST;
- }
- UNLOCK (&worker->qlock);
+ iot_conf_t *conf = this->private;
- iot_notify_worker (worker);
- pthread_join (worker->thread, NULL);
-}
+ if (!conf)
+ return;
+ if (conf->mutex_inited && conf->cond_inited)
+ iot_exit_threads(conf);
-void
-iot_stop_workers (iot_worker_t **workers, int start_idx, int count)
-{
- int i = 0;
- int end_idx = 0;
+ if (conf->cond_inited)
+ pthread_cond_destroy(&conf->cond);
- end_idx = start_idx + count;
- for (i = start_idx; i < end_idx; i++) {
- if (workers[i] != NULL) {
- iot_stop_worker (workers[i]);
- }
- }
-}
+ if (conf->mutex_inited)
+ pthread_mutex_destroy(&conf->mutex);
+ stop_iot_watchdog(this);
-int
-iot_startup_worker (iot_worker_t *worker, iot_worker_fn workerfunc)
-{
- int ret = -1;
- ret = pthread_create (&worker->thread, &worker->conf->w_attr,
- workerfunc, worker);
- if (ret != 0) {
- gf_log (worker->conf->this->name, GF_LOG_ERROR,
- "cannot start worker (%s)", strerror (errno));
- ret = -ret;
- } else {
- worker->state = IOT_STATE_ACTIVE;
- }
+ GF_FREE(conf);
- return ret;
+ this->private = NULL;
+ return;
}
-
int
-iot_startup_workers (iot_worker_t **workers, int start_idx, int count,
- iot_worker_fn workerfunc)
-{
- int i = 0;
- int end_idx = 0;
- int ret = -1;
-
- end_idx = start_idx + count;
- for (i = start_idx; i < end_idx; i++) {
- ret = iot_startup_worker (workers[i], workerfunc);
- if (ret < 0) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-void
-set_stack_size (iot_conf_t *conf)
+iot_client_destroy(xlator_t *this, client_t *client)
{
- int err = 0;
- size_t stacksize = IOT_THREAD_STACK_SIZE;
+ void *tmp = NULL;
- pthread_attr_init (&conf->w_attr);
- err = pthread_attr_setstacksize (&conf->w_attr, stacksize);
- if (err == EINVAL) {
- gf_log (conf->this->name, GF_LOG_WARNING,
- "Using default thread stack size");
- }
-}
+ if (client_ctx_del(client, this, &tmp) == 0) {
+ GF_FREE(tmp);
+ }
-
-void
-iot_cleanup_workers (iot_conf_t *conf)
-{
- if (conf->uworkers != NULL) {
- iot_stop_workers (conf->uworkers, 0,
- conf->max_u_threads);
-
- deallocate_workers (conf->uworkers, 0,
- conf->max_u_threads);
-
- deallocate_worker_array (conf->uworkers);
- }
-
- if (conf->oworkers != NULL) {
- iot_stop_workers (conf->oworkers, 0,
- conf->max_o_threads);
-
- deallocate_workers (conf->oworkers, 0,
- conf->max_o_threads);
-
- deallocate_worker_array (conf->oworkers);
- }
+ return 0;
}
-
-int
-workers_init (iot_conf_t *conf)
+static int
+iot_disconnect_cbk(xlator_t *this, client_t *client)
{
- int ret = -1;
+ int i;
+ call_stub_t *curr;
+ call_stub_t *next;
+ iot_conf_t *conf = this->private;
+ iot_client_ctx_t *ctx;
- if (conf == NULL) {
- ret = -EINVAL;
- goto err;
- }
+ if (!conf || !conf->cleanup_disconnected_reqs) {
+ goto out;
+ }
- /* Initialize un-ordered workers */
- conf->uworkers = allocate_worker_array (conf->max_u_threads);
- if (conf->uworkers == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- ret = -ENOMEM;
- goto err;
- }
-
- ret = allocate_workers (conf, conf->uworkers, 0,
- conf->max_u_threads);
- if (ret < 0) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- /* Initialize ordered workers */
- conf->oworkers = allocate_worker_array (conf->max_o_threads);
- if (conf->oworkers == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- ret = -ENOMEM;
- goto err;
- }
-
- ret = allocate_workers (conf, conf->oworkers, 0,
- conf->max_o_threads);
- if (ret < 0) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- set_stack_size (conf);
- ret = iot_startup_workers (conf->oworkers, 0, conf->min_o_threads,
- iot_worker_ordered);
- if (ret == -1) {
- /* logged inside iot_startup_workers */
- goto err;
- }
-
- ret = iot_startup_workers (conf->uworkers, 0, conf->min_u_threads,
- iot_worker_unordered);
- if (ret == -1) {
- /* logged inside iot_startup_workers */
- goto err;
- }
-
- return 0;
-
-err:
- if (conf != NULL) {
- iot_cleanup_workers (conf);
- }
-
- return ret;
-}
-
-
-int
-init (xlator_t *this)
-{
- iot_conf_t *conf = NULL;
- dict_t *options = this->options;
- int thread_count = IOT_DEFAULT_THREADS;
- gf_boolean_t autoscaling = IOT_SCALING_OFF;
- char *scalestr = NULL;
- int min_threads, max_threads, ret = -1;
-
- if (!this->children || this->children->next) {
- gf_log ("io-threads", GF_LOG_ERROR,
- "FATAL: iot not configured with exactly one child");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = (void *) CALLOC (1, sizeof (*conf));
- if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- if ((dict_get_str (options, "autoscaling", &scalestr)) == 0) {
- if ((gf_string2boolean (scalestr, &autoscaling)) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'autoscaling' option must be"
- " boolean");
- goto out;
- }
- }
-
- if (dict_get (options, "thread-count")) {
- thread_count = data_to_int32 (dict_get (options,
- "thread-count"));
- if (scalestr != NULL)
- gf_log (this->name, GF_LOG_WARNING,
- "'thread-count' is specified with "
- "'autoscaling' on. Ignoring"
- "'thread-count' option.");
- if (thread_count < 2)
- thread_count = IOT_MIN_THREADS;
- }
-
- min_threads = IOT_DEFAULT_THREADS;
- max_threads = IOT_MAX_THREADS;
- if (dict_get (options, "min-threads"))
- min_threads = data_to_int32 (dict_get (options,
- "min-threads"));
-
- if (dict_get (options, "max-threads"))
- max_threads = data_to_int32 (dict_get (options,
- "max-threads"));
-
- if (min_threads > max_threads) {
- gf_log (this->name, GF_LOG_ERROR, " min-threads must be less "
- "than max-threads");
- goto out;
- }
-
- /* If autoscaling is off, then adjust the min and max
- * threads according to thread-count.
- * This is based on the assumption that despite autoscaling
- * being off, we still want to have separate pools for data
- * and meta-data threads.
- */
- if (!autoscaling)
- max_threads = min_threads = thread_count;
-
- /* If user specifies an odd number of threads, increase it by
- * one. The reason for having an even number of threads is
- * explained later.
- */
- if (max_threads % 2)
- max_threads++;
-
- if(min_threads % 2)
- min_threads++;
-
- /* If the user wants to have only a single thread for
- * some strange reason, make sure we set this count to
- * 2. Explained later.
- */
- if (min_threads < IOT_MIN_THREADS)
- min_threads = IOT_MIN_THREADS;
-
- /* Again, have atleast two. Read on. */
- if (max_threads < IOT_MIN_THREADS)
- max_threads = IOT_MIN_THREADS;
-
- /* This is why we need atleast two threads.
- * We're dividing the specified thread pool into
- * 2 halves, equally between ordered and unordered
- * pools.
- */
-
- /* Init params for un-ordered workers. */
- pthread_mutex_init (&conf->utlock, NULL);
- conf->max_u_threads = max_threads / 2;
- conf->min_u_threads = min_threads / 2;
- conf->u_idle_time = IOT_DEFAULT_IDLE;
- conf->u_scaling = autoscaling;
-
- /* Init params for ordered workers. */
- pthread_mutex_init (&conf->otlock, NULL);
- conf->max_o_threads = max_threads / 2;
- conf->min_o_threads = min_threads / 2;
- conf->o_idle_time = IOT_DEFAULT_IDLE;
- conf->o_scaling = autoscaling;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "io-threads: Autoscaling: %s, "
- "min_threads: %d, max_threads: %d",
- (autoscaling) ? "on":"off", min_threads, max_threads);
-
- conf->this = this;
- ret = workers_init (conf);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot initialize worker threads, exiting init");
- FREE (conf);
- goto out;
+ pthread_mutex_lock(&conf->mutex);
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ ctx = &conf->no_client[i];
+ list_for_each_entry_safe(curr, next, &ctx->reqs, list)
+ {
+ if (curr->frame->root->client != client) {
+ continue;
+ }
+ gf_log(this->name, GF_LOG_INFO,
+ "poisoning %s fop at %p for client %s",
+ gf_fop_list[curr->fop], curr, client->client_uid);
+ curr->poison = _gf_true;
}
+ }
+ pthread_mutex_unlock(&conf->mutex);
- this->private = conf;
- ret = 0;
out:
- return ret;
+ return 0;
}
-
-void
-fini (xlator_t *this)
-{
- iot_conf_t *conf = this->private;
-
- FREE (conf);
-
- this->private = NULL;
- return;
-}
-
-/*
- * O - Goes to ordered threadpool.
- * U - Goes to un-ordered threadpool.
- * V - Variable, depends on whether the file is open.
- * If it is, then goes to ordered, otherwise to
- * un-ordered.
- */
-struct xlator_fops fops = {
- .open = iot_open, /* U */
- .create = iot_create, /* U */
- .readv = iot_readv, /* O */
- .writev = iot_writev, /* O */
- .flush = iot_flush, /* O */
- .fsync = iot_fsync, /* O */
- .lk = iot_lk, /* O */
- .stat = iot_stat, /* V */
- .fstat = iot_fstat, /* O */
- .truncate = iot_truncate, /* V */
- .ftruncate = iot_ftruncate, /* O */
- .checksum = iot_checksum, /* U */
- .unlink = iot_unlink, /* U */
- .lookup = iot_lookup, /* U */
- .setattr = iot_setattr, /* U */
- .fsetattr = iot_fsetattr, /* O */
- .access = iot_access, /* U */
- .readlink = iot_readlink, /* U */
- .mknod = iot_mknod, /* U */
- .mkdir = iot_mkdir, /* U */
- .rmdir = iot_rmdir, /* U */
- .symlink = iot_symlink, /* U */
- .rename = iot_rename, /* U */
- .link = iot_link, /* U */
- .opendir = iot_opendir, /* U */
- .fsyncdir = iot_fsyncdir, /* O */
- .statfs = iot_statfs, /* U */
- .setxattr = iot_setxattr, /* U */
- .getxattr = iot_getxattr, /* U */
- .fgetxattr = iot_fgetxattr, /* O */
- .fsetxattr = iot_fsetxattr, /* O */
- .removexattr = iot_removexattr, /* U */
- .readdir = iot_readdir, /* O */
- .readdirp = iot_readdirp, /* O */
- .xattrop = iot_xattrop, /* U */
- .fxattrop = iot_fxattrop, /* O */
+struct xlator_dumpops dumpops = {
+ .priv = iot_priv_dump,
};
-struct xlator_mops mops = {
+struct xlator_fops fops = {
+ .open = iot_open,
+ .create = iot_create,
+ .readv = iot_readv,
+ .writev = iot_writev,
+ .flush = iot_flush,
+ .fsync = iot_fsync,
+ .lk = iot_lk,
+ .stat = iot_stat,
+ .fstat = iot_fstat,
+ .truncate = iot_truncate,
+ .ftruncate = iot_ftruncate,
+ .unlink = iot_unlink,
+ .lookup = iot_lookup,
+ .setattr = iot_setattr,
+ .fsetattr = iot_fsetattr,
+ .access = iot_access,
+ .readlink = iot_readlink,
+ .mknod = iot_mknod,
+ .mkdir = iot_mkdir,
+ .rmdir = iot_rmdir,
+ .symlink = iot_symlink,
+ .rename = iot_rename,
+ .link = iot_link,
+ .opendir = iot_opendir,
+ .fsyncdir = iot_fsyncdir,
+ .statfs = iot_statfs,
+ .setxattr = iot_setxattr,
+ .getxattr = iot_getxattr,
+ .fgetxattr = iot_fgetxattr,
+ .fsetxattr = iot_fsetxattr,
+ .removexattr = iot_removexattr,
+ .fremovexattr = iot_fremovexattr,
+ .readdir = iot_readdir,
+ .readdirp = iot_readdirp,
+ .inodelk = iot_inodelk,
+ .finodelk = iot_finodelk,
+ .entrylk = iot_entrylk,
+ .fentrylk = iot_fentrylk,
+ .xattrop = iot_xattrop,
+ .fxattrop = iot_fxattrop,
+ .rchecksum = iot_rchecksum,
+ .fallocate = iot_fallocate,
+ .discard = iot_discard,
+ .zerofill = iot_zerofill,
+ .seek = iot_seek,
+ .lease = iot_lease,
+ .getactivelk = iot_getactivelk,
+ .setactivelk = iot_setactivelk,
+ .put = iot_put,
};
struct xlator_cbks cbks = {
+ .client_destroy = iot_client_destroy,
+ .client_disconnect = iot_disconnect_cbk,
};
struct volume_options options[] = {
- { .key = {"thread-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS
- },
- { .key = {"autoscaling"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"min-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .description = "Minimum number of threads must be greater than or "
- "equal to 2. If the specified value is less than 2 "
- "it is adjusted upwards to 2. This is a requirement"
- " for the current model of threading in io-threads."
- },
- { .key = {"max-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .description = "Maximum number of threads is advisory only so the "
- "user specified value will be used."
- },
- { .key = {NULL} },
+ {.key = {"thread-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ /*.option = "thread-count"*/
+ .description = "Number of threads in IO threads translator which "
+ "perform concurrent IO operations"
+
+ },
+ {.key = {"high-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform high priority IO operations at a given time"
+
+ },
+ {.key = {"normal-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform normal priority IO operations at a given time"
+
+ },
+ {.key = {"low-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform low priority IO operations at a given time"
+
+ },
+ {.key = {"least-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Max number of threads in IO threads translator which "
+ "perform least priority IO operations at a given time"},
+ {.key = {"enable-least-priority"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = SITE_H_ENABLE_LEAST_PRIORITY,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Enable/Disable least priority"},
+ {
+ .key = {"idle-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 0x7fffffff,
+ .default_value = "120",
+ },
+ {.key = {"watchdog-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = 0,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-threads"},
+ .description = "Number of seconds a queue must be stalled before "
+ "starting an 'emergency' thread."},
+ {.key = {"cleanup-disconnected-reqs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-threads"},
+ .description = "'Poison' queued requests when a client disconnects"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-threads"},
+ .description = "Enable/Disable io threads translator"},
+ {
+ .key = {NULL},
+ },
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-threads",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 3843791ed8e..f54d2f4912d 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -1,183 +1,86 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IOT_H
#define __IOT_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "compat-errno.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include <stdlib.h>
-#include "locking.h"
-#include "compat.h"
-#ifndef GF_SOLARIS_HOST_OS
+#include <glusterfs/locking.h>
+#include "iot-mem-types.h"
#include <semaphore.h>
-#endif
-
-#define min(a,b) ((a)<(b)?(a):(b))
-#define max(a,b) ((a)>(b)?(a):(b))
+#include <glusterfs/statedump.h>
struct iot_conf;
-struct iot_worker;
-struct iot_request;
-
-struct iot_request {
- struct list_head list; /* Attaches this request to the list of
- requests.
- */
- call_stub_t *stub;
-};
-typedef enum {
- IOT_STATE_ACTIVE,
- IOT_STATE_EXIT_REQUEST,
- IOT_STATE_DEAD
-}iot_state_t;
-#define iot_worker_active(wrk) ((wrk)->state == IOT_STATE_ACTIVE)
-
-#define MAX_IDLE_SKEW 4 /* In secs */
-#define skew_sec_idle_time(sec) ((sec) + (random () % MAX_IDLE_SKEW))
-#define IOT_DEFAULT_IDLE 180 /* In secs. */
-
-#define IOT_MIN_THREADS 2
-#define IOT_DEFAULT_THREADS 16
-#define IOT_MAX_THREADS 64
-
-#define IOT_SCALING_OFF _gf_false
-#define IOT_SCALING_ON _gf_true
-#define iot_ordered_scaling_on(conf) ((conf)->o_scaling == IOT_SCALING_ON)
-#define iot_unordered_scaling_on(conf) ((conf)->u_scaling == IOT_SCALING_ON)
-
-#define IOT_THREAD_STACK_SIZE ((size_t)(1024*1024))
-
-/* This signifies the max number of outstanding request we're expecting
- * at a point for every worker thread.
- * For an idea of the memory foot-print, consider at most 16 Bytes per
- * iot_request_t on a 64-bit system with another 16 bytes per chunk in the
- * header. For 64 slots in the pool, we'll use up 2 KiB, with 64 threads this
- * goes up to 128 KiB.
- *
- * Note that this size defines the size of the per-worker mem pool. The
- * advantage is that, we're not only reducing the rate of small iot_request_t
- * allocations from the heap but also reducing the contention on the libc heap
- * by having a mem pool, though small, for each worker.
- */
-#define IOT_REQUEST_MEMPOOL_SIZE 64
-
-struct iot_worker {
- struct list_head rqlist; /* List of requests assigned to me. */
- struct iot_conf *conf;
-#ifndef HAVE_SPINLOCK
- pthread_cond_t notifier;
-#else
- sem_t notifier;
-#endif
- int64_t q,dq;
- gf_lock_t qlock;
- int32_t queue_size;
- pthread_t thread;
- iot_state_t state; /* What state is the thread in. */
- int thread_idx; /* Thread's index into the worker
- array. Since this will be thread
- local data, for ensuring that
- number of threads dont fall below
- a minimum, we just dont allow
- threads with specific indices to
- exit. Helps us in eliminating one
- place where otherwise a lock
- would have been required to update
- centralized state inside conf.
- */
- struct mem_pool *req_pool; /* iot_request_t's come from here. */
-};
+#define MAX_IDLE_SKEW 4 /* In secs */
+#define skew_sec_idle_time(sec) ((sec) + (random() % MAX_IDLE_SKEW))
+#define IOT_DEFAULT_IDLE 120 /* In secs. */
+
+#define IOT_MIN_THREADS 1
+#define IOT_DEFAULT_THREADS 16
+#define IOT_MAX_THREADS 64
+
+#define IOT_THREAD_STACK_SIZE ((size_t)(256 * 1024))
+
+typedef struct {
+ struct list_head clients;
+ struct list_head reqs;
+} iot_client_ctx_t;
struct iot_conf {
- int32_t thread_count;
- struct iot_worker **workers;
-
- xlator_t *this;
- /* Config state for ordered threads. */
- pthread_mutex_t otlock; /* Used to sync any state that needs
- to be changed by the ordered
- threads.
- */
-
- int max_o_threads; /* Max. number of ordered threads */
- int min_o_threads; /* Min. number of ordered threads.
- Ordered thread count never falls
- below this threshold.
- */
-
- int o_idle_time; /* in Secs. The idle time after
- which an ordered thread exits.
- */
- gf_boolean_t o_scaling; /* Set to IOT_SCALING_OFF if user
- does not want thread scaling on
- ordered threads. If scaling is
- off, io-threads maintains at
- least min_o_threads number of
- threads and never lets any thread
- exit.
- */
- struct iot_worker **oworkers; /* Ordered thread pool. */
-
-
- /* Config state for unordered threads */
- pthread_mutex_t utlock; /* Used for scaling un-ordered
- threads. */
- struct iot_worker **uworkers; /* Un-ordered thread pool. */
- int max_u_threads; /* Number of unordered threads will
- not be higher than this. */
- int min_u_threads; /* Number of unordered threads
- should not fall below this value.
- */
- int u_idle_time; /* If an unordered thread does not
- get a request for this amount of
- secs, it should try to die.
- */
- gf_boolean_t u_scaling; /* Set to IOT_SCALING_OFF if user
- does not want thread scaling on
- unordered threads. If scaling is
- off, io-threads maintains at
- least min_u_threads number of
- threads and never lets any thread
- exit.
- */
-
- pthread_attr_t w_attr; /* Used to reduce the stack size of
- the pthread worker down from the
- default of 8MiB.
- */
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ int32_t max_count; /* configured maximum */
+ int32_t curr_count; /* actual number of threads running */
+ int32_t sleep_count;
+
+ int32_t idle_time; /* in seconds */
+
+ struct list_head clients[GF_FOP_PRI_MAX];
+ /*
+ * It turns out that there are several ways a frame can get to us
+ * without having an associated client (server_first_lookup was the
+ * first one I hit). Instead of trying to update all such callers,
+ * we use this to queue them.
+ */
+ iot_client_ctx_t no_client[GF_FOP_PRI_MAX];
+
+ int32_t ac_iot_limit[GF_FOP_PRI_MAX];
+ int32_t ac_iot_count[GF_FOP_PRI_MAX];
+ int queue_sizes[GF_FOP_PRI_MAX];
+ int32_t queue_size;
+ gf_atomic_t stub_cnt;
+ pthread_attr_t w_attr;
+ gf_boolean_t least_priority; /*Enable/Disable least-priority */
+
+ xlator_t *this;
+ size_t stack_size;
+ gf_boolean_t down; /*PARENT_DOWN event is notified*/
+ gf_boolean_t mutex_inited;
+ gf_boolean_t cond_inited;
+
+ int32_t watchdog_secs;
+ gf_boolean_t watchdog_running;
+ pthread_t watchdog_thread;
+ gf_boolean_t queue_marked[GF_FOP_PRI_MAX];
+ gf_boolean_t cleanup_disconnected_reqs;
};
typedef struct iot_conf iot_conf_t;
-typedef struct iot_worker iot_worker_t;
-typedef struct iot_request iot_request_t;
#endif /* __IOT_H */
diff --git a/xlators/performance/io-threads/src/iot-mem-types.h b/xlators/performance/io-threads/src/iot-mem-types.h
new file mode 100644
index 00000000000..29565f34dd4
--- /dev/null
+++ b/xlators/performance/io-threads/src/iot-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __IOT_MEM_TYPES_H__
+#define __IOT_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_iot_mem_types_ {
+ gf_iot_mt_iot_conf_t = gf_common_mt_end + 1,
+ gf_iot_mt_client_ctx_t,
+ gf_iot_mt_end
+};
+#endif
diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/performance/md-cache/Makefile.am
index af437a64d6d..af437a64d6d 100644
--- a/xlators/performance/stat-prefetch/Makefile.am
+++ b/xlators/performance/md-cache/Makefile.am
diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am
new file mode 100644
index 00000000000..447ff0f30f0
--- /dev/null
+++ b/xlators/performance/md-cache/src/Makefile.am
@@ -0,0 +1,29 @@
+xlator_LTLIBRARIES = md-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+md_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+md_cache_la_SOURCES = md-cache.c
+md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = md-cache-mem-types.h md-cache-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+
+stat-prefetch-compat:
+ mkdir -p $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+ rm -rf $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+ ln -s ./md-cache.so $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+
+
+install-exec-local: stat-prefetch-compat
+
+uninstall-local:
+ rm -f $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h
new file mode 100644
index 00000000000..47a07005717
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MDC_MEM_TYPES_H__
+#define __MDC_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_mdc_mem_types_ {
+ gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1,
+ gf_mdc_mt_md_cache_t,
+ gf_mdc_mt_mdc_conf_t,
+ gf_mdc_mt_mdc_ipc,
+ gf_mdc_mt_end
+};
+#endif
diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h
new file mode 100644
index 00000000000..f367bad1991
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-messages.h
@@ -0,0 +1,29 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MD_CACHE_MESSAGES_H_
+#define _MD_CACHE_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(MD_CACHE, MD_CACHE_MSG_NO_MEMORY, MD_CACHE_MSG_DISCARD_UPDATE,
+ MD_CACHE_MSG_CACHE_UPDATE, MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ MD_CACHE_MSG_NO_XATTR_CACHE);
+
+#endif /* _MD_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
new file mode 100644
index 00000000000..a405be51f02
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -0,0 +1,4020 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include "md-cache-mem-types.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/glusterfs-acl.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/upcall-utils.h>
+#include <assert.h>
+#include <sys/time.h>
+#include "md-cache-messages.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/atomic.h>
+
+/* TODO:
+ - cache symlink() link names and nuke symlink-cache
+ - send proper postbuf in setattr_cbk even when op_ret = -1
+*/
+
+struct mdc_statfs_cache {
+ pthread_mutex_t lock;
+ time_t last_refreshed; /* (time_t)-1 if not yet initialized. */
+ struct statvfs buf;
+};
+
+struct mdc_statistics {
+ gf_atomic_t stat_hit; /* No. of times lookup/stat was served from
+ mdc */
+
+ gf_atomic_t stat_miss; /* No. of times valid stat wasn't present in
+ mdc */
+
+ gf_atomic_t xattr_hit; /* No. of times getxattr was served from mdc,
+ Note: this doesn't count the xattr served
+ from lookup */
+
+ gf_atomic_t xattr_miss; /* No. of times xattr req was WIND from mdc */
+ gf_atomic_t negative_lookup; /* No. of negative lookups */
+ gf_atomic_t nameless_lookup; /* No. of negative lookups that were sent
+ to bricks */
+
+ gf_atomic_t stat_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t xattr_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t need_lookup; /* No. of lookups issued, because other
+ xlators requested for explicit lookup */
+};
+
+struct mdc_conf {
+ uint32_t timeout;
+ gf_boolean_t cache_posix_acl;
+ gf_boolean_t cache_glusterfs_acl;
+ gf_boolean_t cache_selinux;
+ gf_boolean_t cache_capability;
+ gf_boolean_t cache_ima;
+ gf_boolean_t force_readdirp;
+ gf_boolean_t cache_swift_metadata;
+ gf_boolean_t cache_samba_metadata;
+ gf_boolean_t mdc_invalidation;
+ gf_boolean_t global_invalidation;
+
+ time_t last_child_down;
+ gf_lock_t lock;
+ struct mdc_statistics mdc_counter;
+ gf_boolean_t cache_statfs;
+ struct mdc_statfs_cache statfs_cache;
+ char *mdc_xattr_str;
+ gf_atomic_int32_t generation;
+};
+
+struct mdc_local;
+typedef struct mdc_local mdc_local_t;
+
+#define MDC_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ mdc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ mdc_local_wipe(__xl, __local); \
+ } while (0)
+
+struct md_cache {
+ ia_prot_t md_prot;
+ uint32_t md_nlink;
+ uint32_t md_uid;
+ uint32_t md_gid;
+ uint32_t md_atime_nsec;
+ uint32_t md_mtime_nsec;
+ uint32_t md_ctime_nsec;
+ int64_t md_atime;
+ int64_t md_mtime;
+ int64_t md_ctime;
+ uint64_t md_rdev;
+ uint64_t md_size;
+ uint64_t md_blocks;
+ uint64_t generation;
+ dict_t *xattr;
+ char *linkname;
+ time_t ia_time;
+ time_t xa_time;
+ gf_boolean_t need_lookup;
+ gf_boolean_t valid;
+ gf_boolean_t gen_rollover;
+ gf_boolean_t invalidation_rollover;
+ gf_lock_t lock;
+};
+
+struct mdc_local {
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ char *linkname;
+ char *key;
+ dict_t *xattr;
+ uint64_t incident_time;
+ bool update_cache;
+};
+
+int
+__mdc_inode_ctx_get(xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint64_t mdc_int = 0;
+
+ ret = __inode_ctx_get(inode, this, &mdc_int);
+ mdc = (void *)(long)(mdc_int);
+ if (ret == 0 && mdc_p)
+ *mdc_p = mdc;
+
+ return ret;
+}
+
+int
+mdc_inode_ctx_get(xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret = -1;
+
+ if (!inode)
+ goto out;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get(this, inode, mdc_p);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+uint64_t
+__mdc_inc_generation(xlator_t *this, struct md_cache *mdc)
+{
+ uint64_t gen = 0, rollover;
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ gen = GF_ATOMIC_INC(conf->generation);
+ if (gen == 0) {
+ mdc->gen_rollover = !mdc->gen_rollover;
+ gen = GF_ATOMIC_INC(conf->generation);
+ mdc->ia_time = 0;
+ mdc->generation = 0;
+ }
+
+ rollover = mdc->gen_rollover;
+ gen |= (rollover << 32);
+ return gen;
+}
+
+uint64_t
+mdc_inc_generation(xlator_t *this, inode_t *inode)
+{
+ struct mdc_conf *conf = NULL;
+ uint64_t gen = 0;
+ struct md_cache *mdc = NULL;
+
+ conf = this->private;
+
+ mdc_inode_ctx_get(this, inode, &mdc);
+
+ if (mdc) {
+ LOCK(&mdc->lock);
+ {
+ gen = __mdc_inc_generation(this, mdc);
+ }
+ UNLOCK(&mdc->lock);
+ } else {
+ gen = GF_ATOMIC_INC(conf->generation);
+ if (gen == 0) {
+ gen = GF_ATOMIC_INC(conf->generation);
+ }
+ }
+
+ return gen;
+}
+
+uint64_t
+mdc_get_generation(xlator_t *this, inode_t *inode)
+{
+ struct mdc_conf *conf = NULL;
+ uint64_t gen = 0;
+ struct md_cache *mdc = NULL;
+
+ conf = this->private;
+
+ mdc_inode_ctx_get(this, inode, &mdc);
+
+ if (mdc) {
+ LOCK(&mdc->lock);
+ {
+ gen = mdc->generation;
+ }
+ UNLOCK(&mdc->lock);
+ } else
+ gen = GF_ATOMIC_GET(conf->generation);
+
+ return gen;
+}
+
+int
+__mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+
+ mdc_int = (long)mdc;
+ ret = __inode_ctx_set(inode, this, &mdc_int);
+
+ return ret;
+}
+
+int
+mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_set(this, inode, mdc);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+mdc_local_t *
+mdc_local_get(call_frame_t *frame, inode_t *inode)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (local)
+ goto out;
+
+ local = GF_CALLOC(sizeof(*local), 1, gf_mdc_mt_mdc_local_t);
+ if (!local)
+ goto out;
+
+ local->incident_time = mdc_get_generation(frame->this, inode);
+ frame->local = local;
+out:
+ return local;
+}
+
+void
+mdc_local_wipe(xlator_t *this, mdc_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe(&local->loc);
+
+ loc_wipe(&local->loc2);
+
+ if (local->fd)
+ fd_unref(local->fd);
+
+ GF_FREE(local->linkname);
+
+ GF_FREE(local->key);
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+
+ GF_FREE(local);
+ return;
+}
+
+int
+mdc_inode_wipe(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+ struct md_cache *mdc = NULL;
+
+ ret = inode_ctx_del(inode, this, &mdc_int);
+ if (ret != 0)
+ goto out;
+
+ mdc = (void *)(long)mdc_int;
+
+ if (mdc->xattr)
+ dict_unref(mdc->xattr);
+
+ GF_FREE(mdc->linkname);
+
+ GF_FREE(mdc);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct md_cache *
+mdc_inode_prep(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get(this, inode, &mdc);
+ if (ret == 0)
+ goto unlock;
+
+ mdc = GF_CALLOC(sizeof(*mdc), 1, gf_mdc_mt_md_cache_t);
+ if (!mdc) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ goto unlock;
+ }
+
+ LOCK_INIT(&mdc->lock);
+
+ ret = __mdc_inode_ctx_set(this, inode, mdc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ GF_FREE(mdc);
+ mdc = NULL;
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ return mdc;
+}
+
+/* Cache is valid if:
+ * - It is not cached before any brick was down. Brick down case is handled by
+ * invalidating all the cache when any brick went down.
+ * - The cache time is not expired
+ */
+static gf_boolean_t
+__is_cache_valid(xlator_t *this, time_t mdc_time)
+{
+ gf_boolean_t ret = _gf_true;
+ struct mdc_conf *conf = NULL;
+ uint32_t timeout = 0;
+ time_t last_child_down = 0;
+
+ conf = this->private;
+
+ /* conf->lock here is not taken deliberately, so that the multi
+ * threaded IO doesn't contend on a global lock. While updating
+ * the variable, the lock is taken, so that at least the writes are
+ * intact. The read of last_child_down may return junk, but that
+ * is for a very short period of time.
+ */
+ last_child_down = conf->last_child_down;
+ timeout = conf->timeout;
+
+ if ((mdc_time == 0) ||
+ ((last_child_down != 0) && (mdc_time < last_child_down))) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ if (gf_time() >= (mdc_time + timeout)) {
+ ret = _gf_false;
+ }
+
+out:
+ return ret;
+}
+
+static gf_boolean_t
+is_md_cache_iatt_valid(xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK(&mdc->lock);
+ {
+ if (mdc->valid == _gf_false) {
+ ret = mdc->valid;
+ } else {
+ ret = __is_cache_valid(this, mdc->ia_time);
+ if (ret == _gf_false) {
+ mdc->ia_time = 0;
+ mdc->generation = 0;
+ }
+ }
+ }
+ UNLOCK(&mdc->lock);
+
+ return ret;
+}
+
+static gf_boolean_t
+is_md_cache_xatt_valid(xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK(&mdc->lock);
+ {
+ ret = __is_cache_valid(this, mdc->xa_time);
+ if (ret == _gf_false)
+ mdc->xa_time = 0;
+ }
+ UNLOCK(&mdc->lock);
+
+ return ret;
+}
+
+void
+mdc_from_iatt(struct md_cache *mdc, struct iatt *iatt)
+{
+ mdc->md_prot = iatt->ia_prot;
+ mdc->md_nlink = iatt->ia_nlink;
+ mdc->md_uid = iatt->ia_uid;
+ mdc->md_gid = iatt->ia_gid;
+ mdc->md_atime = iatt->ia_atime;
+ mdc->md_atime_nsec = iatt->ia_atime_nsec;
+ mdc->md_mtime = iatt->ia_mtime;
+ mdc->md_mtime_nsec = iatt->ia_mtime_nsec;
+ mdc->md_ctime = iatt->ia_ctime;
+ mdc->md_ctime_nsec = iatt->ia_ctime_nsec;
+ mdc->md_rdev = iatt->ia_rdev;
+ mdc->md_size = iatt->ia_size;
+ mdc->md_blocks = iatt->ia_blocks;
+}
+
+void
+mdc_to_iatt(struct md_cache *mdc, struct iatt *iatt)
+{
+ iatt->ia_prot = mdc->md_prot;
+ iatt->ia_nlink = mdc->md_nlink;
+ iatt->ia_uid = mdc->md_uid;
+ iatt->ia_gid = mdc->md_gid;
+ iatt->ia_atime = mdc->md_atime;
+ iatt->ia_atime_nsec = mdc->md_atime_nsec;
+ iatt->ia_mtime = mdc->md_mtime;
+ iatt->ia_mtime_nsec = mdc->md_mtime_nsec;
+ iatt->ia_ctime = mdc->md_ctime;
+ iatt->ia_ctime_nsec = mdc->md_ctime_nsec;
+ iatt->ia_rdev = mdc->md_rdev;
+ iatt->ia_size = mdc->md_size;
+ iatt->ia_blocks = mdc->md_blocks;
+}
+
+int
+mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+ struct iatt *iatt, gf_boolean_t update_time,
+ uint64_t incident_time)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint32_t rollover = 0;
+ uint64_t gen = 0;
+ gf_boolean_t update_xa_time = _gf_false;
+ struct mdc_conf *conf = this->private;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc) {
+ ret = -1;
+ goto out;
+ }
+
+ rollover = incident_time >> 32;
+ incident_time = (incident_time & 0xffffffff);
+
+ LOCK(&mdc->lock);
+ {
+ if (!iatt || !iatt->ia_ctime) {
+ gf_msg_callingfn("md-cache", GF_LOG_TRACE, 0, 0,
+ "invalidating iatt(NULL)"
+ "(%s)",
+ uuid_utoa(inode->gfid));
+ mdc->ia_time = 0;
+ mdc->valid = 0;
+
+ gen = __mdc_inc_generation(this, mdc);
+ mdc->generation = (gen & 0xffffffff);
+ goto unlock;
+ }
+
+ /* There could be a race in invalidation, where the
+ * invalidations in order A, B reaches md-cache in the order
+ * B, A. Hence, make sure the invalidation A is discarded if
+ * it comes after B. ctime of a file is always in ascending
+ * order unlike atime and mtime(which can be changed by user
+ * to any date), also ctime gets updates when atime/mtime
+ * changes, hence check for ctime only.
+ */
+ if (mdc->md_ctime > iatt->ia_ctime) {
+ gf_msg_callingfn(this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request (%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+ if ((mdc->md_ctime == iatt->ia_ctime) &&
+ (mdc->md_ctime_nsec > iatt->ia_ctime_nsec)) {
+ gf_msg_callingfn(this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request(ctime_nsec) (%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (iatt->ia_ctime != mdc->md_ctime) ||
+ (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) {
+ if (conf->global_invalidation &&
+ (!prebuf || (prebuf->ia_mtime != mdc->md_mtime) ||
+ (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_ctime_nsec != mdc->md_ctime_nsec))) {
+ if (IA_ISREG(inode->ia_type)) {
+ gf_msg("md-cache", GF_LOG_TRACE, 0,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "prebuf doesn't match the value we have cached,"
+ " invalidate the inode(%s)",
+ uuid_utoa(inode->gfid));
+
+ inode_invalidate(inode);
+ }
+ } else {
+ update_xa_time = _gf_true;
+ }
+ }
+
+ if ((mdc->gen_rollover == rollover) &&
+ (incident_time >= mdc->generation)) {
+ mdc_from_iatt(mdc, iatt);
+ mdc->valid = _gf_true;
+ if (update_time) {
+ mdc->ia_time = gf_time();
+ if (mdc->xa_time && update_xa_time)
+ mdc->xa_time = mdc->ia_time;
+ }
+
+ gf_msg_callingfn(
+ "md-cache", GF_LOG_TRACE, 0, MD_CACHE_MSG_CACHE_UPDATE,
+ "Updated iatt(%s)"
+ " time:%lld generation=%lld",
+ uuid_utoa(iatt->ia_gfid), (unsigned long long)mdc->ia_time,
+ (unsigned long long)mdc->generation);
+ } else {
+ gf_msg_callingfn("md-cache", GF_LOG_TRACE, 0, 0,
+ "not updating cache (%s)"
+ "mdc-rollover=%u rollover=%u "
+ "mdc-generation=%llu "
+ "mdc-ia_time=%llu incident_time=%llu ",
+ uuid_utoa(iatt->ia_gfid), mdc->gen_rollover,
+ rollover, (unsigned long long)mdc->generation,
+ (unsigned long long)mdc->ia_time,
+ (unsigned long long)incident_time);
+ }
+ }
+unlock:
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+int
+mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt,
+ uint64_t incident_time)
+{
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true,
+ incident_time);
+}
+
+int
+mdc_inode_iatt_get(xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0) {
+ gf_msg_trace("md-cache", 0, "mdc_inode_ctx_get failed (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ if (!is_md_cache_iatt_valid(this, mdc)) {
+ gf_msg_trace("md-cache", 0, "iatt cache not valid for (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&mdc->lock);
+ {
+ mdc_to_iatt(mdc, iatt);
+ }
+ UNLOCK(&mdc->lock);
+
+ gf_uuid_copy(iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino(inode->gfid);
+ iatt->ia_dev = 42;
+ iatt->ia_type = inode->ia_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct updatedict {
+ dict_t *dict;
+ int ret;
+};
+
+static int
+is_mdc_key_satisfied(xlator_t *this, const char *key)
+{
+ int ret = 0;
+ char *pattern = NULL;
+ struct mdc_conf *conf = this->private;
+ char *mdc_xattr_str = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+
+ if (!key)
+ goto out;
+
+ /* conf->mdc_xattr_str, is never freed and is hence safely used outside
+ * of lock*/
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+ goto out;
+
+ mdc_xattr_str = gf_strdup(tmp1);
+ if (!mdc_xattr_str)
+ goto out;
+
+ pattern = strtok_r(mdc_xattr_str, ",", &tmp);
+ while (pattern) {
+ gf_strTrim(&pattern);
+ if (fnmatch(pattern, key, 0) == 0) {
+ ret = 1;
+ break;
+ } else {
+ gf_msg_trace("md-cache", 0,
+ "xattr key %s doesn't satisfy "
+ "caching requirements",
+ key);
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+ GF_FREE(mdc_xattr_str);
+out:
+ return ret;
+}
+
+static int
+updatefn(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct updatedict *u = data;
+
+ if (is_mdc_key_satisfied(THIS, key)) {
+ if (!u->dict) {
+ u->dict = dict_new();
+ if (!u->dict) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+
+ if (dict_set(u->dict, key, value) < 0) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+mdc_dict_update(dict_t **tgt, dict_t *src)
+{
+ struct updatedict u = {
+ .dict = *tgt,
+ .ret = 0,
+ };
+
+ dict_foreach(src, updatefn, &u);
+
+ if (*tgt)
+ return u.ret;
+
+ if ((u.ret < 0) && u.dict) {
+ dict_unref(u.dict);
+ return u.ret;
+ }
+
+ *tgt = u.dict;
+
+ return u.ret;
+}
+
+int
+mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+ dict_t *newdict = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict) {
+ gf_msg_trace("md-cache", 0,
+ "mdc_inode_xatt_set failed (%s) "
+ "dict NULL",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&mdc->lock);
+ {
+ if (mdc->xattr) {
+ gf_msg_trace("md-cache", 0,
+ "deleting the old xattr "
+ "cache (%s)",
+ uuid_utoa(inode->gfid));
+ dict_unref(mdc->xattr);
+ mdc->xattr = NULL;
+ }
+
+ ret = mdc_dict_update(&newdict, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+
+ if (newdict)
+ mdc->xattr = newdict;
+
+ mdc->xa_time = gf_time();
+ gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld",
+ uuid_utoa(inode->gfid), (long long)mdc->xa_time);
+ }
+ UNLOCK(&mdc->lock);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mdc_inode_xatt_update(xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ ret = mdc_dict_update(&mdc->xattr, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+ }
+ UNLOCK(&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mdc_inode_xatt_unset(xlator_t *this, inode_t *inode, char *name)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!name || !mdc->xattr)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ dict_del(mdc->xattr, name);
+ }
+ UNLOCK(&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mdc_inode_xatt_get(xlator_t *this, inode_t *inode, dict_t **dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0) {
+ gf_msg_trace("md-cache", 0, "mdc_inode_ctx_get failed (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ if (!is_md_cache_xatt_valid(this, mdc)) {
+ gf_msg_trace("md-cache", 0, "xattr cache not valid for (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&mdc->lock);
+ {
+ ret = 0;
+ /* Missing xattr only means no keys were there, i.e
+ a negative cache for the "loaded" keys
+ */
+ if (!mdc->xattr) {
+ gf_msg_trace("md-cache", 0, "xattr not present (%s)",
+ uuid_utoa(inode->gfid));
+ goto unlock;
+ }
+
+ if (dict)
+ *dict = dict_ref(mdc->xattr);
+ }
+unlock:
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+mdc_inode_reset_need_lookup(xlator_t *this, inode_t *inode)
+{
+ struct md_cache *mdc = NULL;
+ gf_boolean_t need = _gf_false;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ need = mdc->need_lookup;
+ mdc->need_lookup = _gf_false;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return need;
+}
+
+void
+mdc_inode_set_need_lookup(xlator_t *this, inode_t *inode, gf_boolean_t need)
+{
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ mdc->need_lookup = need;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return;
+}
+
+void
+mdc_inode_iatt_invalidate(xlator_t *this, inode_t *inode)
+{
+ struct md_cache *mdc = NULL;
+ uint32_t gen = 0;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ gen = mdc_inc_generation(this, inode) & 0xffffffff;
+
+ LOCK(&mdc->lock);
+ {
+ mdc->ia_time = 0;
+ mdc->valid = _gf_false;
+ mdc->generation = gen;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return;
+}
+
+int
+mdc_inode_xatt_invalidate(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ mdc->xa_time = 0;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+static int
+mdc_update_gfid_stat(xlator_t *this, struct iatt *iatt)
+{
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, iatt->ia_gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+ ret = mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true,
+ mdc_inc_generation(this, inode));
+out:
+ return ret;
+}
+
+static bool
+mdc_load_reqs(xlator_t *this, dict_t *dict)
+{
+ struct mdc_conf *conf = this->private;
+ char *pattern = NULL;
+ char *mdc_xattr_str = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ int ret = 0;
+ bool loaded = false;
+
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+ goto out;
+
+ mdc_xattr_str = gf_strdup(tmp1);
+ if (!mdc_xattr_str)
+ goto out;
+
+ pattern = strtok_r(mdc_xattr_str, ",", &tmp);
+ while (pattern) {
+ gf_strTrim(&pattern);
+ ret = dict_set_int8(dict, pattern, 0);
+ if (ret) {
+ conf->mdc_xattr_str = NULL;
+ gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for xattrs, dict_set failed");
+ goto out;
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+
+ loaded = true;
+
+out:
+ GF_FREE(mdc_xattr_str);
+
+ return loaded;
+}
+
+struct checkpair {
+ int ret;
+ dict_t *rsp;
+};
+
+static int
+checkfn(dict_t *this, char *key, data_t *value, void *data)
+{
+ struct checkpair *pair = data;
+
+ if (!is_mdc_key_satisfied(THIS, key))
+ pair->ret = 0;
+
+ return 0;
+}
+
+int
+mdc_xattr_satisfied(xlator_t *this, dict_t *req, dict_t *rsp)
+{
+ struct checkpair pair = {
+ .ret = 1,
+ .rsp = rsp,
+ };
+
+ dict_foreach(req, checkfn, &pair);
+
+ return pair.ret;
+}
+
+static void
+mdc_cache_statfs(xlator_t *this, struct statvfs *buf)
+{
+ struct mdc_conf *conf = this->private;
+
+ pthread_mutex_lock(&conf->statfs_cache.lock);
+ {
+ memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs));
+ conf->statfs_cache.last_refreshed = gf_time();
+ }
+ pthread_mutex_unlock(&conf->statfs_cache.lock);
+}
+
+int
+mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
+{
+ struct mdc_conf *conf = this->private;
+ uint32_t cache_age = 0;
+ int ret = 0;
+
+ if (!buf || !conf) {
+ ret = -1;
+ goto err;
+ }
+
+ *buf = NULL;
+
+ pthread_mutex_lock(&conf->statfs_cache.lock);
+ {
+ /* Skip if the cache is not initialized. */
+ if (conf->statfs_cache.last_refreshed == (time_t)-1) {
+ ret = -1;
+ goto unlock;
+ }
+
+ cache_age = (gf_time() - conf->statfs_cache.last_refreshed);
+
+ gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs",
+ cache_age);
+ if (cache_age > conf->timeout) {
+ /* Expire the cache. */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Cache age %u secs exceeded timeout %u secs", cache_age,
+ conf->timeout);
+ ret = -1;
+ goto unlock;
+ }
+
+ *buf = &conf->statfs_cache.buf;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->statfs_cache.lock);
+err:
+ return ret;
+}
+
+static dict_t *
+mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata)
+{
+ if (xdata != NULL) {
+ dict_ref(xdata);
+ }
+
+ if (local == NULL) {
+ return xdata;
+ }
+
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ local->update_cache = false;
+
+ return NULL;
+ }
+ }
+
+ local->update_cache = mdc_load_reqs(this, xdata);
+
+ return xdata;
+}
+
+int
+mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ struct mdc_conf *conf = this->private;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
+
+ goto out;
+ }
+
+ if (conf && conf->cache_statfs) {
+ mdc_cache_statfs(this, buf);
+ }
+
+out:
+ MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+mdc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = 0, op_ret = 0, op_errno = 0;
+ struct statvfs *buf = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!conf) {
+ goto uncached;
+ }
+
+ if (!conf->cache_statfs) {
+ goto uncached;
+ }
+
+ ret = mdc_load_statfs_info_from_cache(this, &buf);
+ if (ret == 0 && buf) {
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ }
+
+uncached:
+ STACK_WIND(frame, mdc_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+
+out:
+ MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
+{
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if (op_errno == ENOENT)
+ GF_ATOMIC_INC(conf->mdc_counter.negative_lookup);
+
+ if (op_errno == ESTALE) {
+ /* if op_errno is ENOENT, fuse-bridge will unlink the
+ * dentry
+ */
+ if (local->loc.parent)
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ else
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, dict);
+ }
+ }
+out:
+ MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict,
+ postparent);
+ return 0;
+}
+
+int
+mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!inode_is_linked(loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ if (mdc_inode_reset_need_lookup(this, loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.need_lookup);
+ goto uncached;
+ }
+
+ ret = mdc_inode_iatt_get(this, loc->inode, &stbuf);
+ if (ret != 0) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ if (xdata) {
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr_rsp);
+ if (ret != 0) {
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ goto uncached;
+ }
+
+ if (!mdc_xattr_satisfied(this, xdata, xattr_rsp)) {
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ goto uncached;
+ }
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(lookup, frame, 0, 0, loc->inode, &stbuf, xattr_rsp,
+ &postparent);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+
+uncached:
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto uncached;
+
+ loc_copy(&local->loc, loc);
+
+ if (!inode_is_linked(loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ ret = mdc_inode_iatt_get(this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(stat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ }
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto uncached;
+
+ local->fd = __fd_ref(fd);
+
+ ret = mdc_inode_iatt_get(this, fd->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(fstat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ local->loc.inode = inode_ref(loc->inode);
+ }
+
+ STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+int
+mdc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+int
+mdc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+int
+mdc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
+
+int
+mdc_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ /* if errno is ESTALE, parent is not present, which implies even
+ * child is not present. Also, man 2 unlink states unlink can
+ * return ENOENT if a component in pathname does not
+ * exist or is a dangling symbolic link. So, invalidate both
+ * parent and child for both errno
+ */
+
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ }
+
+out:
+ MDC_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int
+mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int
+mdc_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ /* if errno is ESTALE, parent is not present, which implies even
+ * child is not present. Also, man 2 rmdir states rmdir can
+ * return ENOENT if a directory component in pathname does not
+ * exist or is a dangling symbolic link. So, invalidate both
+ * parent and child for both errno
+ */
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+out:
+ MDC_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int
+mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata);
+ return 0;
+}
+
+int
+mdc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ char *name;
+
+ name = gf_strdup(linkname);
+ if (name == NULL) {
+ goto wind;
+ }
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name);
+ goto wind;
+ }
+
+ loc_copy(&local->loc, loc);
+ local->linkname = name;
+
+wind:
+ STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
+}
+
+int
+mdc_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc2.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postoldparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ /* TODO: fix dht_rename() not to return linkfile
+ attributes before setting attributes here
+ */
+
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set(this, local->loc2.parent, postnewparent,
+ local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int
+mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, oldloc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
+
+ STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+mdc_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc2.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set(this, local->loc2.parent, postparent,
+ local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, oldloc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
+
+ STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+mdc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+static int
+mdc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (local->fd->flags & O_TRUNC) {
+ /* O_TRUNC modifies file size. Hence invalidate the
+ * cache entry to fetch latest attributes. */
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ }
+
+out:
+ MDC_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+static int
+mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (!fd || !IA_ISREG(fd->inode->ia_type) || !(fd->flags & O_TRUNC)) {
+ goto out;
+ }
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+out:
+ STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+mdc_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->fd->inode, stbuf, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int
+mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int
+mdc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == -1) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
+
+int
+mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+ mdc_inode_xatt_update(this, local->loc.inode, xdata);
+
+out:
+ MDC_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ dict_t *xattr_alloc = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ goto wind;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+ }
+ }
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+ }
+ }
+
+wind:
+ STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+
+ if (xattr_alloc)
+ dict_unref(xattr_alloc);
+ return 0;
+}
+
+int
+mdc_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+ mdc_inode_xatt_update(this, local->fd->inode, xdata);
+
+out:
+ MDC_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ dict_t *xattr_alloc = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ goto wind;
+ }
+
+ local->fd = __fd_ref(fd);
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+ }
+ }
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+ }
+ }
+
+wind:
+ STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+
+ if (xattr_alloc)
+ dict_unref(xattr_alloc);
+ return 0;
+}
+
+int
+mdc_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+}
+
+int
+mdc_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ mdc_inode_xatt_update(this, local->loc.inode, local->xattr);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xattr);
+ }
+
+ STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_xatt_update(this, local->fd->inode, local->xattr);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ local->xattr = dict_ref(xattr);
+ }
+
+ STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata);
+
+ return 0;
+}
+
+int
+mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (dict_get(xattr, "glusterfs.skip-cache")) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
+ "Skipping xattr update due to empty value");
+ goto out;
+ }
+
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int
+mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ int op_errno = ENODATA;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ gf_boolean_t key_satisfied = _gf_false;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!is_mdc_key_satisfied(this, key)) {
+ goto uncached;
+ }
+ key_satisfied = _gf_true;
+
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get(xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+ MDC_STACK_UNWIND(getxattr, frame, ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ if (key_satisfied) {
+ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ if (dict_get(xattr, "glusterfs.skip-cache")) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
+ "Skipping xattr update due to empty value");
+ goto out;
+ }
+
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int
+mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = ENODATA;
+ struct mdc_conf *conf = this->private;
+ gf_boolean_t key_satisfied = _gf_true;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto uncached;
+
+ local->fd = __fd_ref(fd);
+
+ if (!is_mdc_key_satisfied(this, key)) {
+ key_satisfied = _gf_false;
+ goto uncached;
+ }
+
+ ret = mdc_inode_xatt_get(this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get(xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+ MDC_STACK_UNWIND(fgetxattr, frame, ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ if (key_satisfied) {
+ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (local->key)
+ mdc_inode_xatt_unset(this, local->loc.inode, local->key);
+ else
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+out:
+ MDC_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ int op_errno = ENODATA;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ char *name2;
+
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+ local->key = name2;
+
+ if (!is_mdc_key_satisfied(this, name))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+
+ if (!xattr || !dict_get(xattr, (char *)name)) {
+ ret = -1;
+ op_errno = ENODATA;
+
+ MDC_STACK_UNWIND(removexattr, frame, ret, op_errno, xdata);
+ } else {
+ STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
+
+int
+mdc_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ if (local->key)
+ mdc_inode_xatt_unset(this, local->fd->inode, local->key);
+ else
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ int op_errno = ENODATA;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ char *name2;
+
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
+
+ local->fd = __fd_ref(fd);
+ local->key = name2;
+
+ if (!is_mdc_key_satisfied(this, name))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get(this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+
+ if (!xattr || !dict_get(xattr, (char *)name)) {
+ ret = -1;
+ op_errno = ENODATA;
+
+ MDC_STACK_UNWIND(fremovexattr, frame, ret, op_errno, xdata);
+ } else {
+ STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+mdc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ /* Tell readdir-ahead to include these keys in xdata when it
+ * internally issues readdirp() in it's opendir_cbk */
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto unwind;
+
+ if (op_ret <= 0) {
+ if ((op_ret == -1) && ((op_errno == ENOENT) || (op_errno == ESTALE)))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto unwind;
+ }
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
+ mdc_inode_iatt_set(this, entry->inode, &entry->d_stat,
+ local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, entry->inode, entry->dict);
+ }
+ }
+
+unwind:
+ MDC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto out;
+
+ local->fd = __fd_ref(fd);
+
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+out:
+ MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int
+mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto unwind;
+
+ local->fd = __fd_ref(fd);
+
+ if (!conf->force_readdirp) {
+ STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+ return 0;
+ }
+
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+unwind:
+ MDC_STACK_UNWIND(readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int
+mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int32_t
+mdc_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, buf, xdata);
+ return 0;
+}
+
+int32_t
+mdc_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto unwind;
+
+ loc_copy(&local->loc, loc);
+
+ STACK_WIND(frame, mdc_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+mdc_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto unwind;
+
+ local->fd = __fd_ref(fd);
+
+ STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(fsyncdir, frame, -1, ENOMEM, NULL);
+ return 0;
+}
+
+int32_t
+mdc_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mdc_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto unwind;
+
+ loc_copy(&local->loc, loc);
+
+ STACK_WIND(frame, mdc_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(access, frame, -1, ENOMEM, NULL);
+ return 0;
+}
+
+int
+mdc_priv_dump(xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("stat_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_hit));
+ gf_proc_dump_write("stat_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_miss));
+ gf_proc_dump_write("xattr_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_hit));
+ gf_proc_dump_write("xattr_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_miss));
+ gf_proc_dump_write("nameless_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.nameless_lookup));
+ gf_proc_dump_write("negative_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.negative_lookup));
+ gf_proc_dump_write("stat_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_invals));
+ gf_proc_dump_write("xattr_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_invals));
+
+ return 0;
+}
+
+static int32_t
+mdc_dump_metrics(xlator_t *this, int fd)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ dprintf(fd, "%s.stat_cache_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_hit));
+ dprintf(fd, "%s.stat_cache_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_miss));
+ dprintf(fd, "%s.xattr_cache_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_hit));
+ dprintf(fd, "%s.xattr_cache_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_miss));
+ dprintf(fd, "%s.nameless_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.nameless_lookup));
+ dprintf(fd, "%s.negative_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.negative_lookup));
+ dprintf(fd, "%s.stat_cache_invalidations_received %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->mdc_counter.stat_invals));
+ dprintf(fd, "%s.xattr_cache_invalidations_received %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->mdc_counter.xattr_invals));
+out:
+ return 0;
+}
+
+int
+mdc_forget(xlator_t *this, inode_t *inode)
+{
+ mdc_inode_wipe(this, inode);
+
+ return 0;
+}
+
+int
+is_strpfx(const char *str1, const char *str2)
+{
+ /* is one of the string a prefix of the other? */
+ int i;
+
+ for (i = 0; str1[i] == str2[i]; i++) {
+ if (!str1[i] || !str2[i])
+ break;
+ }
+
+ return !(str1[i] && str2[i]);
+}
+
+static int
+mdc_key_unload_all(struct mdc_conf *conf)
+{
+ conf->mdc_xattr_str = NULL;
+
+ return 0;
+}
+
+int
+mdc_xattr_list_populate(struct mdc_conf *conf, char *tmp_str)
+{
+ char *mdc_xattr_str = NULL;
+ size_t max_size = 0;
+ int ret = 0;
+
+ max_size = SLEN(
+ "security.capability,security.selinux,security."
+ "ima," POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR
+ "," GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT
+ ","
+ "user.swift.metadata,user.DOSATTRIB,user.DosStream.*"
+ ",user.org.netatalk.Metadata,security.NTACL,"
+ "user.org.netatalk.ResourceFork") +
+ strlen(tmp_str) + 5; /*Some buffer bytes*/
+
+ mdc_xattr_str = GF_MALLOC(max_size, gf_common_mt_char);
+ GF_CHECK_ALLOC(mdc_xattr_str, ret, out);
+ mdc_xattr_str[0] = '\0';
+
+ if (conf->cache_capability)
+ strcat(mdc_xattr_str, "security.capability,");
+
+ if (conf->cache_selinux)
+ strcat(mdc_xattr_str, "security.selinux,");
+
+ if (conf->cache_ima)
+ strcat(mdc_xattr_str, "security.ima,");
+
+ if (conf->cache_posix_acl)
+ strcat(mdc_xattr_str,
+ POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR ",");
+
+ if (conf->cache_glusterfs_acl)
+ strcat(mdc_xattr_str, GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT ",");
+
+ if (conf->cache_swift_metadata)
+ strcat(mdc_xattr_str, "user.swift.metadata,");
+
+ if (conf->cache_samba_metadata)
+ strcat(mdc_xattr_str,
+ "user.DOSATTRIB,user.DosStream.*,"
+ "user.org.netatalk.Metadata,user.org.netatalk."
+ "ResourceFork,security.NTACL,");
+
+ strcat(mdc_xattr_str, tmp_str);
+
+ LOCK(&conf->lock);
+ {
+ /* This is not freed, else is_mdc_key_satisfied, which is
+ * called by every fop has to take lock, and will lead to
+ * lock contention
+ */
+ conf->mdc_xattr_str = mdc_xattr_str;
+ }
+ UNLOCK(&conf->lock);
+
+out:
+ return ret;
+}
+
+struct set {
+ inode_t *inode;
+ xlator_t *this;
+};
+
+static int
+mdc_inval_xatt(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ struct set *tmp1 = NULL;
+ int ret = 0;
+
+ tmp1 = (struct set *)tmp;
+ ret = mdc_inode_xatt_unset(tmp1->this, tmp1->inode, k);
+ return ret;
+}
+
+static int
+mdc_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ struct set tmp = {
+ 0,
+ };
+ inode_table_t *itable = NULL;
+ struct mdc_conf *conf = this->private;
+ uint64_t gen = 0;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (up_ci->flags & UP_PARENT_DENTRY_FLAGS) {
+ mdc_update_gfid_stat(this, &up_ci->p_stat);
+ if (up_ci->flags & UP_RENAME_FLAGS)
+ mdc_update_gfid_stat(this, &up_ci->oldp_stat);
+ }
+
+ if (up_ci->flags & UP_EXPLICIT_LOOKUP) {
+ mdc_inode_set_need_lookup(this, inode, _gf_true);
+ goto out;
+ }
+
+ if (up_ci->flags &
+ (UP_NLINK | UP_RENAME_FLAGS | UP_FORGET | UP_INVAL_ATTR)) {
+ mdc_inode_iatt_invalidate(this, inode);
+ mdc_inode_xatt_invalidate(this, inode);
+ GF_ATOMIC_INC(conf->mdc_counter.stat_invals);
+ goto out;
+ }
+
+ if (up_ci->flags & IATT_UPDATE_FLAGS) {
+ gen = mdc_inc_generation(this, inode);
+ ret = mdc_inode_iatt_set_validate(this, inode, NULL, &up_ci->stat,
+ _gf_false, gen);
+ /* one of the scenarios where ret < 0 is when this invalidate
+ * is older than the current stat, in that case do not
+ * update the xattrs as well
+ */
+ if (ret < 0)
+ goto out;
+ GF_ATOMIC_INC(conf->mdc_counter.stat_invals);
+ }
+
+ if (up_ci->flags & UP_XATTR) {
+ if (up_ci->dict)
+ ret = mdc_inode_xatt_update(this, inode, up_ci->dict);
+ else
+ ret = mdc_inode_xatt_invalidate(this, inode);
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_invals);
+ } else if (up_ci->flags & UP_XATTR_RM) {
+ tmp.inode = inode;
+ tmp.this = this;
+ ret = dict_foreach(up_ci->dict, mdc_inval_xatt, &tmp);
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_invals);
+ }
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ return ret;
+}
+
+struct mdc_ipc {
+ xlator_t *this;
+ dict_t *xattr;
+};
+
+static int
+mdc_send_xattrs_cbk(int ret, call_frame_t *frame, void *data)
+{
+ struct mdc_ipc *tmp = data;
+
+ if (ret < 0) {
+ mdc_key_unload_all(THIS->private);
+ gf_msg("md-cache", GF_LOG_INFO, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for all xattrs, as registering for "
+ "xattr cache invalidation failed");
+ }
+ STACK_DESTROY(frame->root);
+ dict_unref(tmp->xattr);
+ GF_FREE(tmp);
+
+ return 0;
+}
+
+static int
+mdc_send_xattrs(void *data)
+{
+ int ret = 0;
+ struct mdc_ipc *tmp = data;
+
+ ret = syncop_ipc(FIRST_CHILD(tmp->this), GF_IPC_TARGET_UPCALL, tmp->xattr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_msg(tmp->this->name, GF_LOG_WARNING, errno,
+ MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ "Registering the list "
+ "of xattrs that needs invalidaton, with upcall, failed");
+ }
+
+ return ret;
+}
+
+static int
+mdc_register_xattr_inval(xlator_t *this)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+ call_frame_t *frame = NULL;
+ struct mdc_ipc *data = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ if (!conf->mdc_invalidation) {
+ UNLOCK(&conf->lock);
+ goto out;
+ }
+ }
+ UNLOCK(&conf->lock);
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (!mdc_load_reqs(this, xattr)) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to populate cache entries");
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to create the frame");
+ ret = -1;
+ goto out;
+ }
+
+ data = GF_CALLOC(1, sizeof(struct mdc_ipc), gf_mdc_mt_mdc_ipc);
+ if (!data) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ data->this = this;
+ data->xattr = xattr;
+ ret = synctask_new(this->ctx->env, mdc_send_xattrs, mdc_send_xattrs_cbk,
+ frame, data);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ "Registering the list "
+ "of xattrs that needs invalidaton, with upcall, failed");
+ }
+
+out:
+ if (ret < 0) {
+ mdc_key_unload_all(conf);
+ if (xattr)
+ dict_unref(xattr);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ GF_FREE(data);
+ gf_msg(this->name, GF_LOG_INFO, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for all xattrs, as registering for "
+ "xattr cache invalidation failed");
+ }
+
+ return ret;
+}
+
+int
+mdc_reconfigure(xlator_t *this, dict_t *options)
+{
+ struct mdc_conf *conf = NULL;
+ int timeout = 0, ret = 0;
+ char *tmp_str = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("md-cache-timeout", timeout, options, int32, out);
+
+ GF_OPTION_RECONF("cache-selinux", conf->cache_selinux, options, bool, out);
+
+ GF_OPTION_RECONF("cache-capability-xattrs", conf->cache_capability, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-ima-xattrs", conf->cache_ima, options, bool, out);
+
+ GF_OPTION_RECONF("cache-posix-acl", conf->cache_posix_acl, options, bool,
+ out);
+
+ GF_OPTION_RECONF("cache-glusterfs-acl", conf->cache_glusterfs_acl, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-swift-metadata", conf->cache_swift_metadata,
+ options, bool, out);
+
+ GF_OPTION_RECONF("cache-samba-metadata", conf->cache_samba_metadata,
+ options, bool, out);
+
+ GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool,
+ out);
+
+ GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options,
+ bool, out);
+
+ GF_OPTION_RECONF("global-cache-invalidation", conf->global_invalidation,
+ options, bool, out);
+
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+ GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out);
+
+ GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out);
+
+ ret = mdc_xattr_list_populate(conf, tmp_str);
+ if (ret < 0)
+ goto out;
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+ ret = mdc_register_xattr_inval(this);
+out:
+ return ret;
+}
+
+int32_t
+mdc_mem_acct_init(xlator_t *this)
+{
+ return xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
+}
+
+int
+mdc_init(xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ uint32_t timeout = 0;
+ char *tmp_str = NULL;
+
+ conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t);
+ if (!conf) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ return -1;
+ }
+
+ LOCK_INIT(&conf->lock);
+
+ GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out);
+
+ GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out);
+
+ GF_OPTION_INIT("cache-capability-xattrs", conf->cache_capability, bool,
+ out);
+
+ GF_OPTION_INIT("cache-ima-xattrs", conf->cache_ima, bool, out);
+
+ GF_OPTION_INIT("cache-posix-acl", conf->cache_posix_acl, bool, out);
+
+ GF_OPTION_INIT("cache-glusterfs-acl", conf->cache_glusterfs_acl, bool, out);
+
+ GF_OPTION_INIT("cache-swift-metadata", conf->cache_swift_metadata, bool,
+ out);
+
+ GF_OPTION_INIT("cache-samba-metadata", conf->cache_samba_metadata, bool,
+ out);
+
+ GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+
+ GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out);
+
+ GF_OPTION_INIT("global-cache-invalidation", conf->global_invalidation, bool,
+ out);
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ pthread_mutex_init(&conf->statfs_cache.lock, NULL);
+ GF_OPTION_INIT("md-cache-statfs", conf->cache_statfs, bool, out);
+
+ GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out);
+ mdc_xattr_list_populate(conf, tmp_str);
+
+ conf->last_child_down = gf_time();
+ conf->statfs_cache.last_refreshed = (time_t)-1;
+
+ /* initialize gf_atomic_t counters */
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_hit, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_miss, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.negative_lookup, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.nameless_lookup, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_invals, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_invals, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.need_lookup, 0);
+ GF_ATOMIC_INIT(conf->generation, 0);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+out:
+ this->private = conf;
+
+ return 0;
+}
+
+void
+mdc_update_child_down_time(xlator_t *this, time_t now)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->last_child_down = now;
+ }
+ UNLOCK(&conf->lock);
+}
+
+int
+mdc_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ mdc_update_child_down_time(this, gf_time());
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->mdc_invalidation)
+ ret = mdc_invalidate(this, data);
+ break;
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ ret = mdc_register_xattr_inval(this);
+ break;
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
+
+void
+mdc_fini(xlator_t *this)
+{
+ GF_FREE(this->private);
+}
+
+struct xlator_fops mdc_fops = {
+ .lookup = mdc_lookup,
+ .stat = mdc_stat,
+ .fstat = mdc_fstat,
+ .truncate = mdc_truncate,
+ .ftruncate = mdc_ftruncate,
+ .mknod = mdc_mknod,
+ .mkdir = mdc_mkdir,
+ .unlink = mdc_unlink,
+ .rmdir = mdc_rmdir,
+ .symlink = mdc_symlink,
+ .rename = mdc_rename,
+ .link = mdc_link,
+ .create = mdc_create,
+ .open = mdc_open,
+ .readv = mdc_readv,
+ .writev = mdc_writev,
+ .setattr = mdc_setattr,
+ .fsetattr = mdc_fsetattr,
+ .fsync = mdc_fsync,
+ .setxattr = mdc_setxattr,
+ .fsetxattr = mdc_fsetxattr,
+ .getxattr = mdc_getxattr,
+ .fgetxattr = mdc_fgetxattr,
+ .removexattr = mdc_removexattr,
+ .fremovexattr = mdc_fremovexattr,
+ .opendir = mdc_opendir,
+ .readdirp = mdc_readdirp,
+ .readdir = mdc_readdir,
+ .fallocate = mdc_fallocate,
+ .discard = mdc_discard,
+ .zerofill = mdc_zerofill,
+ .statfs = mdc_statfs,
+ .readlink = mdc_readlink,
+ .fsyncdir = mdc_fsyncdir,
+ .access = mdc_access,
+};
+
+struct xlator_cbks mdc_cbks = {
+ .forget = mdc_forget,
+};
+
+struct xlator_dumpops mdc_dumpops = {
+ .priv = mdc_priv_dump,
+};
+
+struct volume_options mdc_options[] = {
+ {
+ .key = {"md-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable md-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"cache-selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache selinux xattr(security.selinux) on client side",
+ },
+ {
+ .key = {"cache-capability-xattrs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache capability xattr(security.capability) on "
+ "client side",
+ },
+ {
+ .key = {"cache-ima-xattrs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache Linux integrity subsystem xattr(security.ima) "
+ "on client side",
+ },
+ {
+ .key = {"cache-swift-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache swift metadata (user.swift.metadata xattr)",
+ },
+ {
+ .key = {"cache-samba-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL,"
+ " org.netatalk.Metadata, org.netatalk.ResourceFork, "
+ "and user.DosStream. xattrs)",
+ },
+ {
+ .key = {"cache-posix-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache posix ACL xattrs (system.posix_acl_access, "
+ "system.posix_acl_default) on client side",
+ },
+ {
+ .key = {"cache-glusterfs-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache virtual glusterfs ACL xattrs "
+ "(glusterfs.posix.acl, glusterfs.posix.default_acl) "
+ "on client side",
+ },
+ {
+ .key = {"md-cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 600,
+ .default_value = SITE_H_MD_CACHE_TIMEOUT,
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Time period after which cache has to be refreshed",
+ },
+ {
+ .key = {"force-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Convert all readdir requests to readdirplus to "
+ "collect stat info on each entry.",
+ },
+ {
+ .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "When \"on\", invalidates/updates the metadata cache,"
+ " on receiving the cache-invalidation notifications",
+ },
+ {
+ .key = {"global-cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description =
+ "When \"on\", purges all read caches in kernel and glusterfs stack "
+ "whenever a stat change is detected. Stat changes can be detected "
+ "while processing responses to file operations (fop) or through "
+ "upcall notifications. Since purging caches can be an expensive "
+ "operation, it's advised to have this option \"on\" only when a "
+ "file "
+ "can be accessed from multiple different Glusterfs mounts and "
+ "caches across these different mounts are required to be coherent. "
+ "If a file is not accessed across different mounts "
+ "(simple example is having only one mount for a volume), its "
+ "advised to keep "
+ "this option \"off\" as all file modifications go through caches "
+ "keeping them "
+ "coherent. This option overrides value of "
+ "performance.cache-invalidation.",
+ },
+ {
+ .key = {"md-cache-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache statfs information of filesystem on the client",
+ },
+ {
+ .key = {"xattr-cache-list"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "A comma separated list of xattrs that shall be "
+ "cached by md-cache. The only wildcard allowed is '*'",
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"md-cache"},
+ .description = "Enable/Disable md cache translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = mdc_init,
+ .fini = mdc_fini,
+ .notify = mdc_notify,
+ .reconfigure = mdc_reconfigure,
+ .mem_acct_init = mdc_mem_acct_init,
+ .dump_metrics = mdc_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &mdc_dumpops,
+ .fops = &mdc_fops,
+ .cbks = &mdc_cbks,
+ .options = mdc_options,
+ .identifier = "md-cache",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/performance/symlink-cache/Makefile.am b/xlators/performance/nl-cache/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/xlators/performance/symlink-cache/Makefile.am
+++ b/xlators/performance/nl-cache/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/performance/nl-cache/src/Makefile.am b/xlators/performance/nl-cache/src/Makefile.am
new file mode 100644
index 00000000000..c44ce871627
--- /dev/null
+++ b/xlators/performance/nl-cache/src/Makefile.am
@@ -0,0 +1,12 @@
+xlator_LTLIBRARIES = nl-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+nl_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+nl_cache_la_SOURCES = nl-cache.c nl-cache-helper.c
+nl_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+noinst_HEADERS = nl-cache.h nl-cache-mem-types.h nl-cache-messages.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/timer-wheel
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+CLEANFILES =
diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c
new file mode 100644
index 00000000000..29b99b5b8ea
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache-helper.c
@@ -0,0 +1,1201 @@
+/*
+ * Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "nl-cache.h"
+#include "timer-wheel.h"
+#include <glusterfs/statedump.h>
+
+/* Caching guidelines:
+ * This xlator serves negative lookup(ENOENT lookups) from the cache,
+ * there by making create faster.
+ * What is cached?
+ * Negative lookup cache is stored for each directory, and has 2 entries:
+ * - Negative entries: Populated only when lookup/stat returns ENOENT.
+ * Fuse mostly sends only one lookup before create, hence negative entry
+ * cache is almost useless. But for SMB access, multiple lookups/stats
+ * are sent before creating the file. Hence the negative entry cache.
+ * It can exist even when the positive entry cache is invalid. It also
+ * has the entries that were deleted from this directory.
+ * Freed on receiving upcall(with dentry change flag) or on expiring
+ * timeout of the cache.
+ *
+ * - Positive entries: Populated as a part of readdirp, and as a part of
+ * mkdir followed by creates inside that directory. Lookups and other
+ * fops do not populate the positive entry (as it can grow long and is
+ * of no value add)
+ * Freed on receiving upcall(with dentry change flag) or on expiring
+ * timeout of the cache.
+ *
+ * Data structures to store cache?
+ * The cache of any directory is stored in the inode_ctx of the directory.
+ * Negative entries are stored as list of strings.
+ * Search - O(n)
+ * Add - O(1)
+ * Delete - O(n) - as it has to be searched before deleting
+ * Positive entries are stored as a list, each list node has a pointer
+ * to the inode of the positive entry or the name of the entry.
+ * Since the client side inode table already will have inodes for
+ * positive entries, we just take a ref of that inode and store as
+ * positive entry cache. In cases like hardlinks and readdirp where
+ * inode is NULL, we store the names.
+ * Name Search - O(n)
+ * Inode Search - O(1) - Actually complexity of inode_find()
+ * Name/inode Add - O(1)
+ * Name Delete - O(n)
+ * Inode Delete - O(1)
+ *
+ * Locking order:
+ *
+ * TODO:
+ * - Fill Positive entries on readdir/p, after which in lookup_cbk check if the
+ * name is in PE and replace it with inode.
+ * - fini, PARENET_DOWN, disable caching
+ * - Virtual setxattr to dump the inode_ctx, to ease debugging
+ * - Handle dht_nuke xattr: clear all cache
+ * - Special handling for .meta and .trashcan?
+ */
+
+int
+__nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx);
+int
+__nlc_add_to_lru(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx);
+void
+nlc_remove_from_lru(xlator_t *this, inode_t *inode);
+void
+__nlc_inode_ctx_timer_delete(xlator_t *this, nlc_ctx_t *nlc_ctx);
+gf_boolean_t
+__nlc_search_ne(nlc_ctx_t *nlc_ctx, const char *name);
+void
+__nlc_free_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe);
+void
+__nlc_free_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne);
+
+static int32_t
+nlc_get_cache_timeout(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* Cache timeout is generally not meant to be changed often,
+ * once set, hence not within locks */
+ return conf->cache_timeout;
+}
+
+static gf_boolean_t
+__nlc_is_cache_valid(xlator_t *this, nlc_ctx_t *nlc_ctx)
+{
+ nlc_conf_t *conf = NULL;
+ time_t last_val_time;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO(this->name, nlc_ctx, out);
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ last_val_time = conf->last_child_down;
+ }
+ UNLOCK(&conf->lock);
+
+ if ((last_val_time <= nlc_ctx->cache_time) && (nlc_ctx->cache_time != 0))
+ ret = _gf_true;
+out:
+ return ret;
+}
+
+void
+nlc_update_child_down_time(xlator_t *this, time_t now)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->last_child_down = now;
+ }
+ UNLOCK(&conf->lock);
+
+ return;
+}
+
+void
+nlc_disable_cache(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->disable_cache = _gf_true;
+ }
+ UNLOCK(&conf->lock);
+
+ return;
+}
+
+static int
+__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+{
+ int ret = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+ uint64_t nlc_ctx_int = 0;
+ uint64_t nlc_pe_int = 0;
+
+ ret = __inode_ctx_get2(inode, this, &nlc_ctx_int, &nlc_pe_int);
+ if (ret == 0 && nlc_ctx_p) {
+ nlc_ctx = (void *)(long)(nlc_ctx_int);
+ *nlc_ctx_p = nlc_ctx;
+ }
+ return ret;
+}
+
+static int
+nlc_inode_ctx_set(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx,
+ nlc_pe_t *nlc_pe_p)
+{
+ uint64_t ctx1, ctx2;
+ int ret = -1;
+
+ ctx1 = (uint64_t)(uintptr_t)nlc_ctx;
+ ctx2 = (uint64_t)(uintptr_t)nlc_pe_p;
+
+ /* The caller may choose to set one of the ctxs, hence check
+ * if the ctx1/2 is non zero and then send the address. If we
+ * blindly send the address of both the ctxs, it may reset the
+ * ctx the caller had sent NULL(intended as leave untouched) for.*/
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set2(inode, this, ctx1 ? &ctx1 : 0, ctx2 ? &ctx2 : 0);
+ }
+ UNLOCK(&inode->lock);
+ return ret;
+}
+
+static void
+nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+{
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0,
+ "inode ctx get failed for "
+ "inode:%p",
+ inode);
+ }
+ UNLOCK(&inode->lock);
+
+ return;
+}
+
+static void
+__nlc_inode_clear_entries(xlator_t *this, nlc_ctx_t *nlc_ctx)
+{
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp1 = NULL;
+
+ if (!nlc_ctx)
+ goto out;
+
+ if (IS_PE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ __nlc_free_pe(this, nlc_ctx, pe);
+ }
+
+ if (IS_NE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(ne, tmp1, &nlc_ctx->ne, list)
+ {
+ __nlc_free_ne(this, nlc_ctx, ne);
+ }
+
+ nlc_ctx->cache_time = 0;
+ nlc_ctx->state = 0;
+ GF_ASSERT(nlc_ctx->cache_size == sizeof(*nlc_ctx));
+ GF_ASSERT(nlc_ctx->refd_inodes == 0);
+out:
+ return;
+}
+
+static void
+nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
+{
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ /* The cache/nlc_ctx can be invalid for 2 reasons:
+ * - Because of a child-down/timer expiry, cache is
+ * invalid but the nlc_ctx is not yet cleaned up.
+ * - nlc_ctx is cleaned up, because of invalidations
+ * or lru prune etc.*/
+
+ /* If the cache is present but invalid, clear the cache and
+ * reset the timer. */
+ __nlc_inode_clear_entries(this, nlc_ctx);
+
+ /* If timer is present, then it is already part of lru as well
+ * Hence reset the timer and return.*/
+ if (nlc_ctx->timer) {
+ gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer,
+ conf->cache_timeout);
+ nlc_ctx->cache_time = gf_time();
+ goto unlock;
+ }
+
+ /* If timer was NULL, the nlc_ctx is already cleanedup,
+ * and we need to start timer and add to lru, so that it is
+ * ready to cache entries a fresh */
+ ret = __nlc_inode_ctx_timer_start(this, inode, nlc_ctx);
+ if (ret < 0)
+ goto unlock;
+
+ ret = __nlc_add_to_lru(this, inode, nlc_ctx);
+ if (ret < 0) {
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+static nlc_ctx_t *
+nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p)
+{
+ uint64_t ctx;
+ int ret = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&inode->lock);
+ {
+ ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (nlc_ctx)
+ goto unlock;
+
+ nlc_ctx = GF_CALLOC(sizeof(*nlc_ctx), 1, gf_nlc_mt_nlc_ctx_t);
+ if (!nlc_ctx)
+ goto unlock;
+
+ LOCK_INIT(&nlc_ctx->lock);
+ INIT_LIST_HEAD(&nlc_ctx->pe);
+ INIT_LIST_HEAD(&nlc_ctx->ne);
+
+ ret = __nlc_inode_ctx_timer_start(this, inode, nlc_ctx);
+ if (ret < 0)
+ goto unlock;
+
+ ret = __nlc_add_to_lru(this, inode, nlc_ctx);
+ if (ret < 0) {
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+ goto unlock;
+ }
+
+ ctx = (uint64_t)(uintptr_t)nlc_ctx;
+ ret = __inode_ctx_set2(inode, this, &ctx, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, NLC_MSG_NO_MEMORY,
+ "inode ctx set failed");
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+ nlc_remove_from_lru(this, inode);
+ goto unlock;
+ }
+
+ /*TODO: also sizeof (gf_tw_timer_list) + nlc_timer_data_t ?*/
+ nlc_ctx->cache_size = sizeof(*nlc_ctx);
+ GF_ATOMIC_ADD(conf->current_cache_size, nlc_ctx->cache_size);
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (ret == 0 && nlc_ctx_p) {
+ *nlc_ctx_p = nlc_ctx;
+ nlc_init_invalid_ctx(this, inode, nlc_ctx);
+ }
+
+ if (ret < 0 && nlc_ctx) {
+ LOCK_DESTROY(&nlc_ctx->lock);
+ GF_FREE(nlc_ctx);
+ nlc_ctx = NULL;
+ goto out;
+ }
+
+out:
+ return nlc_ctx;
+}
+
+nlc_local_t *
+nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, loc_t *loc2)
+{
+ nlc_local_t *local = NULL;
+
+ local = GF_CALLOC(sizeof(*local), 1, gf_nlc_mt_nlc_local_t);
+ if (!local)
+ goto out;
+
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (loc2)
+ loc_copy(&local->loc2, loc2);
+
+ local->fop = fop;
+ frame->local = local;
+out:
+ return local;
+}
+
+void
+nlc_local_wipe(xlator_t *this, nlc_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe(&local->loc);
+
+ loc_wipe(&local->loc2);
+
+ GF_FREE(local);
+out:
+ return;
+}
+
+static void
+__nlc_set_dir_state(nlc_ctx_t *nlc_ctx, uint64_t new_state)
+{
+ nlc_ctx->state |= new_state;
+
+ return;
+}
+
+void
+nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ __nlc_set_dir_state(nlc_ctx, state);
+ }
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+static void
+nlc_cache_timeout_handler(struct gf_tw_timer_list *timer, void *data,
+ unsigned long calltime)
+{
+ nlc_timer_data_t *tmp = data;
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ /* Taking nlc_ctx->lock will lead to deadlock, hence updating
+ * the cache is invalid outside of lock, instead of clear_cache.
+ * Since cache_time is assigned outside of lock, the value can
+ * be invalid for short time, this may result in false negative
+ * which is better than deadlock */
+ nlc_ctx->cache_time = 0;
+out:
+ return;
+}
+
+void
+__nlc_inode_ctx_timer_delete(xlator_t *this, nlc_ctx_t *nlc_ctx)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (nlc_ctx->timer)
+ gf_tw_del_timer(conf->timer_wheel, nlc_ctx->timer);
+
+ if (nlc_ctx->timer_data) {
+ inode_unref(nlc_ctx->timer_data->inode);
+ GF_FREE(nlc_ctx->timer_data);
+ nlc_ctx->timer_data = NULL;
+ }
+
+ GF_FREE(nlc_ctx->timer);
+ nlc_ctx->timer = NULL;
+
+ return;
+}
+
+int
+__nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
+{
+ struct gf_tw_timer_list *timer = NULL;
+ nlc_timer_data_t *tmp = NULL;
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ /* We are taking inode_table->lock within inode->lock
+ * as the only other caller which takes inode->lock within
+ * inode_table->lock and cause deadlock is inode_table_destroy.
+ * Hopefully, there can be no fop when inode_table_destroy is
+ * being called. */
+ tmp = GF_CALLOC(1, sizeof(*tmp), gf_nlc_mt_nlc_timer_data_t);
+ if (!tmp)
+ goto out;
+ tmp->inode = inode_ref(inode);
+ tmp->this = this;
+
+ timer = GF_CALLOC(1, sizeof(*timer), gf_common_mt_tw_timer_list);
+ if (!timer)
+ goto out;
+
+ INIT_LIST_HEAD(&timer->entry);
+ timer->expires = nlc_get_cache_timeout(this);
+ timer->function = nlc_cache_timeout_handler;
+ timer->data = tmp;
+ nlc_ctx->timer = timer;
+ nlc_ctx->timer_data = tmp;
+ gf_tw_add_timer(conf->timer_wheel, timer);
+
+ nlc_ctx->cache_time = gf_time();
+ gf_msg_trace(this->name, 0,
+ "Registering timer:%p, inode:%p, "
+ "gfid:%s",
+ timer, inode, uuid_utoa(inode->gfid));
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ if (tmp && tmp->inode)
+ inode_unref(tmp->inode);
+ GF_FREE(tmp);
+ GF_FREE(timer);
+ }
+
+ return ret;
+}
+
+int
+__nlc_add_to_lru(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
+{
+ nlc_lru_node_t *lru_ino = NULL;
+ uint64_t nlc_pe_int = 0;
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ lru_ino = GF_CALLOC(1, sizeof(*lru_ino), gf_nlc_mt_nlc_lru_node);
+ if (!lru_ino)
+ goto out;
+
+ INIT_LIST_HEAD(&lru_ino->list);
+ lru_ino->inode = inode_ref(inode);
+ LOCK(&conf->lock);
+ {
+ list_add_tail(&lru_ino->list, &conf->lru);
+ }
+ UNLOCK(&conf->lock);
+
+ nlc_ctx->refd_inodes = 0;
+ ret = __inode_ctx_get2(inode, this, NULL, &nlc_pe_int);
+ if (nlc_pe_int == 0)
+ GF_ATOMIC_ADD(conf->refd_inodes, 1);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+void
+nlc_remove_from_lru(xlator_t *this, inode_t *inode)
+{
+ nlc_lru_node_t *lru_node = NULL;
+ nlc_lru_node_t *tmp = NULL;
+ nlc_lru_node_t *tmp1 = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ list_for_each_entry_safe(lru_node, tmp, &conf->lru, list)
+ {
+ if (inode == lru_node->inode) {
+ list_del(&lru_node->list);
+ tmp1 = lru_node;
+ break;
+ }
+ }
+ }
+ UNLOCK(&conf->lock);
+
+ if (tmp1) {
+ inode_unref(tmp1->inode);
+ GF_FREE(tmp1);
+ }
+
+ return;
+}
+
+void
+nlc_lru_prune(xlator_t *this, inode_t *inode)
+{
+ nlc_lru_node_t *lru_node = NULL;
+ nlc_lru_node_t *prune_node = NULL;
+ nlc_lru_node_t *tmp = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ if ((GF_ATOMIC_GET(conf->refd_inodes) < conf->inode_limit) &&
+ (GF_ATOMIC_GET(conf->current_cache_size) < conf->cache_size))
+ goto unlock;
+
+ list_for_each_entry_safe(lru_node, tmp, &conf->lru, list)
+ {
+ list_del(&lru_node->list);
+ prune_node = lru_node;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&conf->lock);
+
+ if (prune_node) {
+ nlc_inode_clear_cache(this, prune_node->inode, NLC_LRU_PRUNE);
+ inode_unref(prune_node->inode);
+ GF_FREE(prune_node);
+ }
+ return;
+}
+
+void
+nlc_clear_all_cache(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+ struct list_head clear_list;
+ nlc_lru_node_t *prune_node = NULL;
+ nlc_lru_node_t *tmp = NULL;
+
+ conf = this->private;
+
+ INIT_LIST_HEAD(&clear_list);
+
+ LOCK(&conf->lock);
+ {
+ list_replace_init(&conf->lru, &clear_list);
+ }
+ UNLOCK(&conf->lock);
+
+ list_for_each_entry_safe(prune_node, tmp, &clear_list, list)
+ {
+ list_del(&prune_node->list);
+ nlc_inode_clear_cache(this, prune_node->inode, NLC_LRU_PRUNE);
+ inode_unref(prune_node->inode);
+ GF_FREE(prune_node);
+ }
+
+ return;
+}
+
+void
+__nlc_free_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe)
+{
+ uint64_t pe_int = 0;
+ nlc_conf_t *conf = NULL;
+ uint64_t nlc_ctx_int = 0;
+
+ conf = this->private;
+
+ if (pe->inode) {
+ inode_ctx_reset1(pe->inode, this, &pe_int);
+ inode_ctx_get2(pe->inode, this, &nlc_ctx_int, NULL);
+ inode_unref(pe->inode);
+ }
+ list_del(&pe->list);
+
+ nlc_ctx->cache_size -= sizeof(*pe) + sizeof(pe->name);
+ GF_ATOMIC_SUB(conf->current_cache_size, (sizeof(*pe) + sizeof(pe->name)));
+
+ nlc_ctx->refd_inodes -= 1;
+ if (nlc_ctx_int == 0)
+ GF_ATOMIC_SUB(conf->refd_inodes, 1);
+
+ GF_FREE(pe->name);
+ GF_FREE(pe);
+
+ return;
+}
+
+void
+__nlc_free_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ list_del(&ne->list);
+ GF_FREE(ne->name);
+ GF_FREE(ne);
+
+ nlc_ctx->cache_size -= sizeof(*ne) + sizeof(ne->name);
+ GF_ATOMIC_SUB(conf->current_cache_size, (sizeof(*ne) + sizeof(ne->name)));
+
+ return;
+}
+
+void
+nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ __nlc_inode_ctx_timer_delete(this, nlc_ctx);
+
+ __nlc_inode_clear_entries(this, nlc_ctx);
+ }
+ UNLOCK(&nlc_ctx->lock);
+
+ if (reason != NLC_LRU_PRUNE)
+ nlc_remove_from_lru(this, inode);
+
+out:
+ return;
+}
+
+static void
+__nlc_del_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, inode_t *entry_ino,
+ const char *name, gf_boolean_t multilink)
+{
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+ gf_boolean_t found = _gf_false;
+ uint64_t pe_int = 0;
+
+ if (!IS_PE_VALID(nlc_ctx->state))
+ goto out;
+
+ if (!entry_ino)
+ goto name_search;
+
+ /* If there are hardlinks first search names, followed by inodes */
+ if (multilink) {
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = _gf_true;
+ goto out;
+ }
+ }
+ inode_ctx_reset1(entry_ino, this, &pe_int);
+ if (pe_int) {
+ pe = (void *)(long)(pe_int);
+ found = _gf_true;
+ goto out;
+ }
+ goto out;
+ }
+
+ inode_ctx_reset1(entry_ino, this, &pe_int);
+ if (pe_int) {
+ pe = (void *)(long)(pe_int);
+ found = _gf_true;
+ goto out;
+ }
+
+name_search:
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = _gf_true;
+ break;
+ /* TODO: can there be duplicates? */
+ }
+ }
+
+out:
+ if (found)
+ __nlc_free_pe(this, nlc_ctx, pe);
+
+ return;
+}
+
+static void
+__nlc_del_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, const char *name)
+{
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp = NULL;
+
+ if (!IS_NE_VALID(nlc_ctx->state))
+ goto out;
+
+ list_for_each_entry_safe(ne, tmp, &nlc_ctx->ne, list)
+ {
+ if (strcmp(ne->name, name) == 0) {
+ __nlc_free_ne(this, nlc_ctx, ne);
+ break;
+ }
+ }
+out:
+ return;
+}
+
+static void
+__nlc_add_pe(xlator_t *this, nlc_ctx_t *nlc_ctx, inode_t *entry_ino,
+ const char *name)
+{
+ nlc_pe_t *pe = NULL;
+ int ret = -1;
+ nlc_conf_t *conf = NULL;
+ uint64_t nlc_ctx_int = 0;
+
+ conf = this->private;
+
+ /* TODO: There can be no duplicate entries, as it is added only
+ during create. In case there arises duplicate entries, search PE
+ found = __nlc_search (entries, name, _gf_false);
+ can use bit vector to have simple search than sequential search */
+
+ pe = GF_CALLOC(sizeof(*pe), 1, gf_nlc_mt_nlc_pe_t);
+ if (!pe)
+ goto out;
+
+ if (entry_ino) {
+ pe->inode = inode_ref(entry_ino);
+ nlc_inode_ctx_set(this, entry_ino, NULL, pe);
+ } else if (name) {
+ pe->name = gf_strdup(name);
+ if (!pe->name)
+ goto out;
+ }
+
+ list_add(&pe->list, &nlc_ctx->pe);
+
+ nlc_ctx->cache_size += sizeof(*pe) + sizeof(pe->name);
+ GF_ATOMIC_ADD(conf->current_cache_size, (sizeof(*pe) + sizeof(pe->name)));
+
+ nlc_ctx->refd_inodes += 1;
+ inode_ctx_get2(entry_ino, this, &nlc_ctx_int, NULL);
+ if (nlc_ctx_int == 0)
+ GF_ATOMIC_ADD(conf->refd_inodes, 1);
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(pe);
+
+ return;
+}
+
+static void
+__nlc_add_ne(xlator_t *this, nlc_ctx_t *nlc_ctx, const char *name)
+{
+ nlc_ne_t *ne = NULL;
+ int ret = -1;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* TODO: search ne before adding to get rid of duplicate entries
+ found = __nlc_search (entries, name, _gf_false);
+ can use bit vector to have faster search than sequential search */
+
+ ne = GF_CALLOC(sizeof(*ne), 1, gf_nlc_mt_nlc_ne_t);
+ if (!ne)
+ goto out;
+
+ ne->name = gf_strdup(name);
+ if (!ne->name)
+ goto out;
+
+ list_add(&ne->list, &nlc_ctx->ne);
+
+ nlc_ctx->cache_size += sizeof(*ne) + sizeof(ne->name);
+ GF_ATOMIC_ADD(conf->current_cache_size, (sizeof(*ne) + sizeof(ne->name)));
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(ne);
+
+ return;
+}
+
+void
+nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ /* There is one possibility where we need to search before
+ * adding NE: when there are two parallel lookups on a non
+ * existent file */
+ if (!__nlc_search_ne(nlc_ctx, name)) {
+ __nlc_add_ne(this, nlc_ctx, name);
+ __nlc_set_dir_state(nlc_ctx, NLC_NE_VALID);
+ }
+ }
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+void
+nlc_dir_remove_pe(xlator_t *this, inode_t *parent, inode_t *entry_ino,
+ const char *name, gf_boolean_t multilink)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (parent->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get(this, parent, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (!__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ __nlc_del_pe(this, nlc_ctx, entry_ino, name, multilink);
+ __nlc_add_ne(this, nlc_ctx, name);
+ __nlc_set_dir_state(nlc_ctx, NLC_NE_VALID);
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+void
+nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ const char *name)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get_set(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ __nlc_del_ne(this, nlc_ctx, name);
+ __nlc_add_pe(this, nlc_ctx, entry_ino, name);
+ if (!IS_PE_VALID(nlc_ctx->state))
+ __nlc_set_dir_state(nlc_ctx, NLC_PE_PARTIAL);
+ }
+ UNLOCK(&nlc_ctx->lock);
+out:
+ return;
+}
+
+gf_boolean_t
+__nlc_search_ne(nlc_ctx_t *nlc_ctx, const char *name)
+{
+ gf_boolean_t found = _gf_false;
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp = NULL;
+
+ if (!IS_NE_VALID(nlc_ctx->state))
+ goto out;
+
+ list_for_each_entry_safe(ne, tmp, &nlc_ctx->ne, list)
+ {
+ if (strcmp(ne->name, name) == 0) {
+ found = _gf_true;
+ break;
+ }
+ }
+out:
+ return found;
+}
+
+static gf_boolean_t
+__nlc_search_pe(nlc_ctx_t *nlc_ctx, const char *name)
+{
+ gf_boolean_t found = _gf_false;
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+
+ if (!IS_PE_VALID(nlc_ctx->state))
+ goto out;
+
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = _gf_true;
+ break;
+ }
+ }
+out:
+ return found;
+}
+
+static char *
+__nlc_get_pe(nlc_ctx_t *nlc_ctx, const char *name,
+ gf_boolean_t case_insensitive)
+{
+ char *found = NULL;
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+
+ if (!IS_PE_VALID(nlc_ctx->state))
+ goto out;
+
+ if (case_insensitive) {
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcasecmp(pe->name, name) == 0)) {
+ found = pe->name;
+ break;
+ }
+ }
+ } else {
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ if (pe->name && (strcmp(pe->name, name) == 0)) {
+ found = pe->name;
+ break;
+ }
+ }
+ }
+out:
+ return found;
+}
+
+gf_boolean_t
+nlc_is_negative_lookup(xlator_t *this, loc_t *loc)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t neg_entry = _gf_false;
+
+ inode = loc->parent;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (!__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ if (__nlc_search_ne(nlc_ctx, loc->name)) {
+ neg_entry = _gf_true;
+ goto unlock;
+ }
+ if ((nlc_ctx->state & NLC_PE_FULL) &&
+ !__nlc_search_pe(nlc_ctx, loc->name)) {
+ neg_entry = _gf_true;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+
+out:
+ return neg_entry;
+}
+
+gf_boolean_t
+nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname,
+ int32_t *op_ret, int32_t *op_errno, dict_t *dict)
+{
+ nlc_ctx_t *nlc_ctx = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t hit = _gf_false;
+ char *found_file = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, fname, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_ret, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ inode = loc->inode;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ if (inode->ia_type != IA_IFDIR) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, NLC_MSG_EINVAL,
+ "inode is not of type dir");
+ goto out;
+ }
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+ if (!nlc_ctx)
+ goto out;
+
+ LOCK(&nlc_ctx->lock);
+ {
+ if (!__nlc_is_cache_valid(this, nlc_ctx))
+ goto unlock;
+
+ found_file = __nlc_get_pe(nlc_ctx, fname, _gf_true);
+ if (found_file) {
+ ret = dict_set_dynstr(dict, GF_XATTR_GET_REAL_FILENAME_KEY,
+ gf_strdup(found_file));
+ if (ret < 0)
+ goto unlock;
+ *op_ret = strlen(found_file) + 1;
+ hit = _gf_true;
+ goto unlock;
+ }
+ if (!found_file && (nlc_ctx->state & NLC_PE_FULL)) {
+ *op_ret = -1;
+ *op_errno = ENOENT;
+ hit = _gf_true;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&nlc_ctx->lock);
+
+out:
+ return hit;
+}
+
+void
+nlc_dump_inodectx(xlator_t *this, inode_t *inode)
+{
+ int32_t ret = -1;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char uuid_str[64] = {
+ 0,
+ };
+ nlc_ctx_t *nlc_ctx = NULL;
+ nlc_pe_t *pe = NULL;
+ nlc_pe_t *tmp = NULL;
+ nlc_ne_t *ne = NULL;
+ nlc_ne_t *tmp1 = NULL;
+
+ nlc_inode_ctx_get(this, inode, &nlc_ctx);
+
+ if (!nlc_ctx)
+ goto out;
+
+ ret = TRY_LOCK(&nlc_ctx->lock);
+ if (!ret) {
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.nl-cache",
+ "nlc_inode");
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ __inode_path(inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
+
+ uuid_utoa_r(inode->gfid, uuid_str);
+
+ gf_proc_dump_write("inode", "%p", inode);
+ gf_proc_dump_write("gfid", "%s", uuid_str);
+
+ gf_proc_dump_write("state", "%" PRIu64, nlc_ctx->state);
+ gf_proc_dump_write("timer", "%p", nlc_ctx->timer);
+ gf_proc_dump_write("cache-time", "%ld", nlc_ctx->cache_time);
+ gf_proc_dump_write("cache-size", "%zu", nlc_ctx->cache_size);
+ gf_proc_dump_write("refd-inodes", "%" PRIu64, nlc_ctx->refd_inodes);
+
+ if (IS_PE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(pe, tmp, &nlc_ctx->pe, list)
+ {
+ gf_proc_dump_write("pe", "%p, %p, %s", pe, pe->inode, pe->name);
+ }
+
+ if (IS_NE_VALID(nlc_ctx->state))
+ list_for_each_entry_safe(ne, tmp1, &nlc_ctx->ne, list)
+ {
+ gf_proc_dump_write("ne", "%s", ne->name);
+ }
+
+ UNLOCK(&nlc_ctx->lock);
+ }
+
+ if (ret && nlc_ctx)
+ gf_proc_dump_write("Unable to dump the inode information",
+ "(Lock acquisition failed) %p (gfid: %s)", nlc_ctx,
+ uuid_str);
+out:
+ return;
+}
diff --git a/xlators/performance/nl-cache/src/nl-cache-mem-types.h b/xlators/performance/nl-cache/src/nl-cache-mem-types.h
new file mode 100644
index 00000000000..93a17b3fd5a
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __NL_CACHE_MEM_TYPES_H__
+#define __NL_CACHE_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_nlc_mem_types_ {
+ gf_nlc_mt_nlc_conf_t = gf_common_mt_end + 1,
+ gf_nlc_mt_nlc_ctx_t,
+ gf_nlc_mt_nlc_local_t,
+ gf_nlc_mt_nlc_pe_t,
+ gf_nlc_mt_nlc_ne_t,
+ gf_nlc_mt_nlc_timer_data_t,
+ gf_nlc_mt_nlc_lru_node,
+ gf_nlc_mt_end
+};
+
+#endif /* __NL_CACHE_MEM_TYPES_H__ */
diff --git a/xlators/performance/nl-cache/src/nl-cache-messages.h b/xlators/performance/nl-cache/src/nl-cache-messages.h
new file mode 100644
index 00000000000..222d709e133
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache-messages.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __NL_CACHE_MESSAGES_H__
+#define __NL_CACHE_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(NLC, NLC_MSG_NO_MEMORY, NLC_MSG_EINVAL, NLC_MSG_NO_TIMER_WHEEL,
+ NLC_MSG_DICT_FAILURE);
+
+#endif /* __NL_CACHE_MESSAGES_H__ */
diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
new file mode 100644
index 00000000000..33a7c471663
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache.c
@@ -0,0 +1,840 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "nl-cache.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/upcall-utils.h>
+
+static void
+nlc_dentry_op(call_frame_t *frame, xlator_t *this, gf_boolean_t multilink)
+{
+ nlc_local_t *local = frame->local;
+
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+
+ switch (local->fop) {
+ case GF_FOP_MKDIR:
+ nlc_set_dir_state(this, local->loc.inode, NLC_PE_FULL);
+ /*fall-through*/
+ case GF_FOP_MKNOD:
+ case GF_FOP_CREATE:
+ case GF_FOP_SYMLINK:
+ nlc_dir_add_pe(this, local->loc.parent, local->loc.inode,
+ local->loc.name);
+ break;
+ case GF_FOP_LINK:
+ nlc_dir_add_pe(this, local->loc2.parent, NULL, local->loc2.name);
+ break;
+ case GF_FOP_RMDIR:
+ nlc_inode_clear_cache(this, local->loc.inode, _gf_false);
+ /*fall-through*/
+ case GF_FOP_UNLINK:
+ nlc_dir_remove_pe(this, local->loc.parent, local->loc.inode,
+ local->loc.name, multilink);
+ break;
+ case GF_FOP_RENAME:
+ /* TBD: Should these be atomic ? In case of rename, the
+ * newloc->inode can be NULL, and hence use oldloc->inode */
+ nlc_dir_remove_pe(this, local->loc2.parent, local->loc2.inode,
+ local->loc2.name, _gf_false);
+
+ /*TODO: Remove old dentry from destination before adding this pe*/
+ nlc_dir_add_pe(this, local->loc.parent, local->loc2.inode,
+ local->loc.name);
+
+ default:
+ return;
+ }
+
+ nlc_lru_prune(this, NULL);
+out:
+ return;
+}
+
+#define NLC_FOP(_name, _op, loc1, loc2, frame, this, args...) \
+ do { \
+ nlc_local_t *__local = NULL; \
+ nlc_conf_t *conf = NULL; \
+ \
+ conf = this->private; \
+ \
+ if (!IS_PEC_ENABLED(conf)) \
+ goto disabled; \
+ \
+ __local = nlc_local_init(frame, this, _op, loc1, loc2); \
+ GF_VALIDATE_OR_GOTO(this->name, __local, err); \
+ \
+ STACK_WIND(frame, nlc_##_name##_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->_name, args); \
+ break; \
+ disabled: \
+ default_##_name##_resume(frame, this, args); \
+ break; \
+ err: \
+ default_##_name##_failure_cbk(frame, ENOMEM); \
+ break; \
+ } while (0)
+
+#define NLC_FOP_CBK(_name, multilink, frame, cookie, this, op_ret, op_errno, \
+ args...) \
+ do { \
+ nlc_conf_t *conf = NULL; \
+ \
+ if (op_ret != 0) \
+ goto out; \
+ \
+ conf = this->private; \
+ \
+ if (op_ret < 0 || !IS_PEC_ENABLED(conf)) \
+ goto out; \
+ nlc_dentry_op(frame, this, multilink); \
+ out: \
+ NLC_STACK_UNWIND(_name, frame, op_ret, op_errno, args); \
+ } while (0)
+
+static int32_t
+nlc_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ NLC_FOP_CBK(rename, _gf_false, frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent, postnewparent,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ NLC_FOP(rename, GF_FOP_RENAME, newloc, oldloc, frame, this, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(mknod, _gf_false, frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ NLC_FOP(mknod, GF_FOP_MKNOD, loc, NULL, frame, this, loc, mode, rdev, umask,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(create, _gf_false, frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ NLC_FOP(create, GF_FOP_CREATE, loc, NULL, frame, this, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(mkdir, _gf_false, frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ NLC_FOP(mkdir, GF_FOP_MKDIR, loc, NULL, frame, this, loc, mode, umask,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ nlc_local_t *local = NULL;
+ nlc_conf_t *conf = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (!local)
+ goto out;
+
+ /* Donot add to pe, this may lead to duplicate entry and
+ * requires search before adding if list of strings */
+ if (op_ret < 0 && op_errno == ENOENT) {
+ nlc_dir_add_ne(this, local->loc.parent, local->loc.name);
+ GF_ATOMIC_INC(conf->nlc_counter.nlc_miss);
+ }
+
+out:
+ NLC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t
+nlc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ nlc_local_t *local = NULL;
+ nlc_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ if (loc_is_nameless(loc))
+ goto wind;
+
+ local = nlc_local_init(frame, this, GF_FOP_LOOKUP, loc, NULL);
+ if (!local)
+ goto err;
+
+ conf = this->private;
+
+ inode = inode_grep(loc->inode->table, loc->parent, loc->name);
+ if (inode) {
+ inode_unref(inode);
+ goto wind;
+ }
+
+ if (nlc_is_negative_lookup(this, loc)) {
+ GF_ATOMIC_INC(conf->nlc_counter.nlc_hit);
+ gf_msg_trace(this->name, 0,
+ "Serving negative lookup from "
+ "cache:%s",
+ loc->name);
+ goto unwind;
+ }
+
+wind:
+ STACK_WIND(frame, nlc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+unwind:
+ NLC_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
+ return 0;
+err:
+ NLC_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+nlc_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ NLC_FOP_CBK(rmdir, _gf_false, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ NLC_FOP(rmdir, GF_FOP_RMDIR, loc, NULL, frame, this, loc, flags, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (!IS_PEC_ENABLED(conf))
+ goto out;
+
+ if (op_ret < 0 && op_errno == ENOENT) {
+ GF_ATOMIC_INC(conf->nlc_counter.getrealfilename_miss);
+ }
+
+out:
+ NLC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ dict_t *dict = NULL;
+ nlc_local_t *local = NULL;
+ gf_boolean_t hit = _gf_false;
+ const char *fname = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!IS_PEC_ENABLED(conf))
+ goto wind;
+
+ if (!key || (strncmp(key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) != 0))
+ goto wind;
+
+ local = nlc_local_init(frame, this, GF_FOP_GETXATTR, loc, NULL);
+ if (!local)
+ goto err;
+
+ if (loc->inode && key) {
+ dict = dict_new();
+ if (!dict)
+ goto err;
+
+ fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY);
+ hit = nlc_get_real_file_name(this, loc, fname, &op_ret, &op_errno,
+ dict);
+ if (hit)
+ goto unwind;
+ else
+ dict_unref(dict);
+ }
+
+ STACK_WIND(frame, nlc_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
+wind:
+ STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
+unwind:
+ GF_ATOMIC_INC(conf->nlc_counter.getrealfilename_hit);
+ NLC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, NULL);
+ dict_unref(dict);
+ return 0;
+err:
+ NLC_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+nlc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(symlink, _gf_false, frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ NLC_FOP(symlink, GF_FOP_SYMLINK, loc, NULL, frame, this, linkpath, loc,
+ umask, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ NLC_FOP_CBK(link, _gf_false, frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ NLC_FOP(link, GF_FOP_LINK, oldloc, newloc, frame, this, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+static int32_t
+nlc_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ uint32_t link_count = 0;
+ gf_boolean_t multilink = _gf_false;
+
+ if (xdata && !dict_get_uint32(xdata, GET_LINK_COUNT, &link_count)) {
+ if (link_count > 1)
+ multilink = _gf_true;
+ } else {
+ /* Don't touch cache if we don't know enough */
+ gf_msg(this->name, GF_LOG_WARNING, 0, NLC_MSG_DICT_FAILURE,
+ "Failed to get GET_LINK_COUNT from dict");
+ NLC_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+ }
+
+ NLC_FOP_CBK(unlink, multilink, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+static int32_t
+nlc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ nlc_conf_t *conf = NULL;
+ gf_boolean_t new_dict = _gf_false;
+
+ conf = this->private;
+
+ if (!IS_PEC_ENABLED(conf))
+ goto do_fop;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (xdata)
+ new_dict = _gf_true;
+ }
+
+ if (xdata && dict_set_uint32(xdata, GET_LINK_COUNT, 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, NLC_MSG_DICT_FAILURE,
+ "Failed to set GET_LINK_COUNT in dict");
+ goto err;
+ }
+
+do_fop:
+ NLC_FOP(unlink, GF_FOP_UNLINK, loc, NULL, frame, this, loc, flags, xdata);
+
+ if (new_dict)
+ dict_unref(xdata);
+ return 0;
+}
+
+static int32_t
+nlc_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ inode_t *parent1 = NULL;
+ inode_t *parent2 = NULL;
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ nlc_conf_t *conf = NULL;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ /*TODO: Add he inodes found as a member in gf_upcall_cache_invalidation
+ * so that it prevents subsequent xlators from doing inode_find again
+ */
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if ((!((up_ci->flags & UP_TIMES) && inode->ia_type == IA_IFDIR)) &&
+ (!(up_ci->flags & UP_PARENT_DENTRY_FLAGS))) {
+ goto out;
+ }
+
+ if (!gf_uuid_is_null(up_ci->p_stat.ia_gfid)) {
+ parent1 = inode_find(itable, up_ci->p_stat.ia_gfid);
+ if (!parent1) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (!gf_uuid_is_null(up_ci->oldp_stat.ia_gfid)) {
+ parent2 = inode_find(itable, up_ci->oldp_stat.ia_gfid);
+ if (!parent2) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* TODO: get enough data in upcall so that we do not invalidate but
+ * update */
+ if (inode && inode->ia_type == IA_IFDIR)
+ nlc_inode_clear_cache(this, inode, NLC_NONE);
+ if (parent1)
+ nlc_inode_clear_cache(this, parent1, NLC_NONE);
+ if (parent2)
+ nlc_inode_clear_cache(this, parent2, NLC_NONE);
+
+ GF_ATOMIC_INC(conf->nlc_counter.nlc_invals);
+
+out:
+ if (inode)
+ inode_unref(inode);
+ if (parent1)
+ inode_unref(parent1);
+ if (parent2)
+ inode_unref(parent2);
+
+ return ret;
+}
+
+int
+nlc_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ nlc_update_child_down_time(this, gf_time());
+ /* TODO: nlc_clear_all_cache (this); else
+ lru prune will lazily clear it*/
+ break;
+ case GF_EVENT_UPCALL:
+ ret = nlc_invalidate(this, data);
+ break;
+ case GF_EVENT_PARENT_DOWN:
+ nlc_disable_cache(this);
+ nlc_clear_all_cache(this);
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
+
+static int32_t
+nlc_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t pe_int = 0;
+ uint64_t nlc_ctx_int = 0;
+ nlc_ctx_t *nlc_ctx = NULL;
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ inode_ctx_reset1(inode, this, &pe_int);
+ GF_ASSERT(pe_int == 0);
+
+ nlc_inode_clear_cache(this, inode, NLC_NONE);
+ inode_ctx_reset0(inode, this, &nlc_ctx_int);
+ nlc_ctx = (void *)(long)nlc_ctx_int;
+ if (nlc_ctx) {
+ GF_FREE(nlc_ctx);
+ GF_ATOMIC_SUB(conf->current_cache_size, sizeof(*nlc_ctx));
+ }
+
+ return 0;
+}
+
+static int32_t
+nlc_inodectx(xlator_t *this, inode_t *inode)
+{
+ nlc_dump_inodectx(this, inode);
+ return 0;
+}
+
+static int32_t
+nlc_priv_dump(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("negative_lookup_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_hit));
+ gf_proc_dump_write("negative_lookup_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_miss));
+ gf_proc_dump_write("get_real_filename_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_hit));
+ gf_proc_dump_write("get_real_filename_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_miss));
+ gf_proc_dump_write("nameless_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nameless_lookup));
+ gf_proc_dump_write("inodes_with_positive_dentry_cache", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.pe_inode_cnt));
+ gf_proc_dump_write("inodes_with_negative_dentry_cache", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.ne_inode_cnt));
+ gf_proc_dump_write("dentry_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_invals));
+ gf_proc_dump_write("cache_limit", "%" PRIu64, conf->cache_size);
+ gf_proc_dump_write("consumed_cache_size", "%" PRId64,
+ GF_ATOMIC_GET(conf->current_cache_size));
+ gf_proc_dump_write("inode_limit", "%" PRIu64, conf->inode_limit);
+ gf_proc_dump_write("consumed_inodes", "%" PRId64,
+ GF_ATOMIC_GET(conf->refd_inodes));
+
+ return 0;
+}
+
+static int32_t
+nlc_dump_metrics(xlator_t *this, int fd)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ dprintf(fd, "%s.negative_lookup_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_hit));
+ dprintf(fd, "%s.negative_lookup_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_miss));
+ dprintf(fd, "%s.get_real_filename_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_hit));
+ dprintf(fd, "%s.get_real_filename_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.getrealfilename_miss));
+ dprintf(fd, "%s.nameless_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nameless_lookup));
+ dprintf(fd, "%s.inodes_with_positive_dentry_cache %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->nlc_counter.pe_inode_cnt));
+ dprintf(fd, "%s.inodes_with_negative_dentry_cache %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->nlc_counter.ne_inode_cnt));
+ dprintf(fd, "%s.dentry_invalidations_received %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->nlc_counter.nlc_invals));
+ dprintf(fd, "%s.cache_limit %" PRIu64 "\n", this->name, conf->cache_size);
+ dprintf(fd, "%s.consumed_cache_size %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->current_cache_size));
+ dprintf(fd, "%s.inode_limit %" PRIu64 "\n", this->name, conf->inode_limit);
+ dprintf(fd, "%s.consumed_inodes %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->refd_inodes));
+
+ return 0;
+}
+
+void
+nlc_fini(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+ GF_FREE(conf);
+
+ glusterfs_ctx_tw_put(this->ctx);
+
+ return;
+}
+
+int32_t
+nlc_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_nlc_mt_end + 1);
+ return ret;
+}
+
+int32_t
+nlc_reconfigure(xlator_t *this, dict_t *options)
+{
+ nlc_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("nl-cache-timeout", conf->cache_timeout, options, int32,
+ out);
+ GF_OPTION_RECONF("nl-cache-positive-entry", conf->positive_entry_cache,
+ options, bool, out);
+ GF_OPTION_RECONF("nl-cache-limit", conf->cache_size, options, size_uint64,
+ out);
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+out:
+ return 0;
+}
+
+int32_t
+nlc_init(xlator_t *this)
+{
+ nlc_conf_t *conf = NULL;
+ int ret = -1;
+ inode_table_t *itable = NULL;
+
+ conf = GF_CALLOC(sizeof(*conf), 1, gf_nlc_mt_nlc_conf_t);
+ if (!conf)
+ goto out;
+
+ GF_OPTION_INIT("nl-cache-timeout", conf->cache_timeout, int32, out);
+ GF_OPTION_INIT("nl-cache-positive-entry", conf->positive_entry_cache, bool,
+ out);
+ GF_OPTION_INIT("nl-cache-limit", conf->cache_size, size_uint64, out);
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ /* Since the positive entries are stored as list of refs on
+ * existing inodes, we should not overflow the inode lru_limit.
+ * Hence keep the limit of inodes that are refed by this xlator,
+ * to 80% of inode_table->lru_limit. In fuse where the limit is
+ * infinite, take 131072 as lru limit (as in gfapi). */
+ itable = ((xlator_t *)this->graph->top)->itable;
+ if (itable && itable->lru_limit)
+ conf->inode_limit = itable->lru_limit * 80 / 100;
+ else
+ conf->inode_limit = 131072 * 80 / 100;
+
+ LOCK_INIT(&conf->lock);
+ GF_ATOMIC_INIT(conf->current_cache_size, 0);
+ GF_ATOMIC_INIT(conf->refd_inodes, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nlc_hit, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nlc_miss, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nameless_lookup, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.getrealfilename_hit, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.getrealfilename_miss, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.pe_inode_cnt, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.ne_inode_cnt, 0);
+ GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0);
+
+ INIT_LIST_HEAD(&conf->lru);
+ conf->last_child_down = gf_time();
+
+ conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
+ if (!conf->timer_wheel) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, NLC_MSG_NO_TIMER_WHEEL,
+ "Initing the global timer wheel failed");
+ goto out;
+ }
+
+ this->private = conf;
+
+ ret = 0;
+out:
+ if (ret < 0)
+ GF_FREE(conf);
+
+ return ret;
+}
+
+struct xlator_fops nlc_fops = {
+ .rename = nlc_rename,
+ .mknod = nlc_mknod,
+ .create = nlc_create,
+ .mkdir = nlc_mkdir,
+ .lookup = nlc_lookup,
+ .rmdir = nlc_rmdir,
+ .getxattr = nlc_getxattr,
+ .symlink = nlc_symlink,
+ .link = nlc_link,
+ .unlink = nlc_unlink,
+ /* TODO:
+ .readdir = nlc_readdir,
+ .readdirp = nlc_readdirp,
+ .seek = nlc_seek,
+ .opendir = nlc_opendir, */
+};
+
+struct xlator_cbks nlc_cbks = {
+ .forget = nlc_forget,
+};
+
+struct xlator_dumpops nlc_dumpops = {
+ .inodectx = nlc_inodectx,
+ .priv = nlc_priv_dump,
+};
+
+struct volume_options nlc_options[] = {
+ {
+ .key = {"nl-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable nl-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"nl-cache-positive-entry"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache the name of the files/directories that was"
+ " looked up and are present in a directory",
+ },
+ {
+ .key = {"nl-cache-limit"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .default_value = "131072",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "the value over which caching will be disabled for"
+ "a while and the cache is cleared based on LRU",
+ },
+ {
+ .key = {"nl-cache-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .default_value = "60",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Time period after which cache has to be refreshed",
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"nl-cache"},
+ .description = "Enable/Disable nl cache translator"},
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = nlc_init,
+ .fini = nlc_fini,
+ .notify = nlc_notify,
+ .reconfigure = nlc_reconfigure,
+ .mem_acct_init = nlc_mem_acct_init,
+ .dump_metrics = nlc_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &nlc_dumpops,
+ .fops = &nlc_fops,
+ .cbks = &nlc_cbks,
+ .options = nlc_options,
+ .identifier = "nl-cache",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h
new file mode 100644
index 00000000000..85fcc176342
--- /dev/null
+++ b/xlators/performance/nl-cache/src/nl-cache.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __NL_CACHE_H__
+#define __NL_CACHE_H__
+
+#include "nl-cache-mem-types.h"
+#include "nl-cache-messages.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/atomic.h>
+
+#define NLC_INVALID 0x0000
+#define NLC_PE_FULL 0x0001
+#define NLC_PE_PARTIAL 0x0002
+#define NLC_NE_VALID 0x0004
+
+#define IS_PE_VALID(state) \
+ ((state != NLC_INVALID) && (state & (NLC_PE_FULL | NLC_PE_PARTIAL)))
+#define IS_NE_VALID(state) ((state != NLC_INVALID) && (state & NLC_NE_VALID))
+
+#define IS_PEC_ENABLED(conf) (conf->positive_entry_cache)
+#define IS_CACHE_ENABLED(conf) ((!conf->cache_disabled))
+
+#define NLC_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ nlc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ nlc_local_wipe(__xl, __local); \
+ } while (0)
+
+enum nlc_cache_clear_reason {
+ NLC_NONE = 0,
+ NLC_LRU_PRUNE,
+};
+
+struct nlc_ne {
+ struct list_head list;
+ char *name;
+};
+typedef struct nlc_ne nlc_ne_t;
+
+struct nlc_pe {
+ struct list_head list;
+ inode_t *inode;
+ char *name;
+};
+typedef struct nlc_pe nlc_pe_t;
+
+struct nlc_timer_data {
+ inode_t *inode;
+ xlator_t *this;
+};
+typedef struct nlc_timer_data nlc_timer_data_t;
+
+struct nlc_lru_node {
+ inode_t *inode;
+ struct list_head list;
+};
+typedef struct nlc_lru_node nlc_lru_node_t;
+
+struct nlc_ctx {
+ struct list_head pe; /* list of positive entries */
+ struct list_head ne; /* list of negative entries */
+ uint64_t state;
+ time_t cache_time;
+ struct gf_tw_timer_list *timer;
+ nlc_timer_data_t *timer_data;
+ size_t cache_size;
+ uint64_t refd_inodes;
+ gf_lock_t lock;
+};
+typedef struct nlc_ctx nlc_ctx_t;
+
+struct nlc_local {
+ loc_t loc;
+ loc_t loc2;
+ inode_t *inode;
+ inode_t *parent;
+ fd_t *fd;
+ char *linkname;
+ glusterfs_fop_t fop;
+};
+typedef struct nlc_local nlc_local_t;
+
+struct nlc_statistics {
+ gf_atomic_t nlc_hit; /* No. of times lookup/stat was served from this xl */
+ gf_atomic_t nlc_miss; /* No. of times negative lookups were sent to disk */
+ /* More granular counters */
+ gf_atomic_t nameless_lookup;
+ gf_atomic_t getrealfilename_hit;
+ gf_atomic_t getrealfilename_miss;
+ gf_atomic_t pe_inode_cnt;
+ gf_atomic_t ne_inode_cnt;
+ gf_atomic_t nlc_invals; /* No. of invalidates received from upcall*/
+};
+
+struct nlc_conf {
+ int32_t cache_timeout;
+ gf_boolean_t positive_entry_cache;
+ gf_boolean_t negative_entry_cache;
+ gf_boolean_t disable_cache;
+ uint64_t cache_size;
+ gf_atomic_t current_cache_size;
+ uint64_t inode_limit;
+ gf_atomic_t refd_inodes;
+ struct tvec_base *timer_wheel;
+ time_t last_child_down;
+ struct list_head lru;
+ gf_lock_t lock;
+ struct nlc_statistics nlc_counter;
+};
+typedef struct nlc_conf nlc_conf_t;
+
+gf_boolean_t
+nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname,
+ int32_t *op_ret, int32_t *op_errno, dict_t *dict);
+
+gf_boolean_t
+nlc_is_negative_lookup(xlator_t *this, loc_t *loc);
+
+void
+nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state);
+
+void
+nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ const char *name);
+
+void
+nlc_dir_remove_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino,
+ const char *name, gf_boolean_t multilink);
+
+void
+nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name);
+
+void
+nlc_local_wipe(xlator_t *this, nlc_local_t *local);
+
+nlc_local_t *
+nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, loc_t *loc2);
+
+void
+nlc_update_child_down_time(xlator_t *this, time_t now);
+
+void
+nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason);
+
+void
+nlc_dump_inodectx(xlator_t *this, inode_t *inode);
+
+void
+nlc_clear_all_cache(xlator_t *this);
+
+void
+nlc_disable_cache(xlator_t *this);
+
+void
+nlc_lru_prune(xlator_t *this, inode_t *inode);
+
+#endif /* __NL_CACHE_H__ */
diff --git a/xlators/performance/open-behind/Makefile.am b/xlators/performance/open-behind/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/performance/open-behind/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/open-behind/src/Makefile.am b/xlators/performance/open-behind/src/Makefile.am
new file mode 100644
index 00000000000..41930dcd67d
--- /dev/null
+++ b/xlators/performance/open-behind/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = open-behind.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+open_behind_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+open_behind_la_SOURCES = open-behind.c
+open_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = open-behind-mem-types.h open-behind-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h
new file mode 100644
index 00000000000..6c1ab2e19d2
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __OB_MEM_TYPES_H__
+#define __OB_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_ob_mem_types_ {
+ gf_ob_mt_fd_t = gf_common_mt_end + 1,
+ gf_ob_mt_conf_t,
+ gf_ob_mt_inode_t,
+ gf_ob_mt_end
+};
+#endif
diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
new file mode 100644
index 00000000000..0e789177684
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind-messages.h
@@ -0,0 +1,32 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _OPEN_BEHIND_MESSAGES_H_
+#define _OPEN_BEHIND_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+ OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
+ OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
+
+#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
+#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
+
+#endif /* _OPEN_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
new file mode 100644
index 00000000000..600c3b62ffe
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -0,0 +1,1101 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "open-behind-mem-types.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include "open-behind-messages.h"
+#include <glusterfs/glusterfs-acl.h>
+
+/* Note: The initial design of open-behind was made to cover the simple case
+ * of open, read, close for small files. This pattern combined with
+ * quick-read can do the whole operation without a single request to the
+ * bricks (except the initial lookup).
+ *
+ * The way to do this has been improved, but the logic remains the same.
+ * Basically, this means that any operation sent to the fd or the inode
+ * that it's not a read, causes the open request to be sent to the
+ * bricks, and all future operations will be executed synchronously,
+ * including opens (it's reset once all fd's are closed).
+ */
+
+typedef struct ob_conf {
+ gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
+ e.g - fstat() readv()
+
+ whereas for fops like writev(), lk(),
+ the fd is important for side effects
+ like mandatory locks
+ */
+ gf_boolean_t lazy_open; /* delay backend open as much as possible */
+ gf_boolean_t read_after_open; /* instead of sending readvs on
+ anonymous fds, open the file
+ first and then send readv i.e
+ similar to what writev does
+ */
+} ob_conf_t;
+
+/* A negative state represents an errno value negated. In this case the
+ * current operation cannot be processed. */
+typedef enum _ob_state {
+ /* There are no opens on the inode or the first open is already
+ * completed. The current operation can be sent directly. */
+ OB_STATE_READY = 0,
+
+ /* There's an open pending and it has been triggered. The current
+ * operation should be "stubbified" and processed with
+ * ob_stub_dispatch(). */
+ OB_STATE_OPEN_TRIGGERED,
+
+ /* There's an open pending but it has not been triggered. The current
+ * operation can be processed directly but using an anonymous fd. */
+ OB_STATE_OPEN_PENDING,
+
+ /* The current operation is the first open on the inode. */
+ OB_STATE_FIRST_OPEN
+} ob_state_t;
+
+typedef struct ob_inode {
+ /* List of stubs pending on the first open. Once the first open is
+ * complete, all these stubs will be resubmitted, and dependencies
+ * will be checked again. */
+ struct list_head resume_fops;
+
+ /* The inode this object references. */
+ inode_t *inode;
+
+ /* The fd from the first open sent to this inode. It will be set
+ * from the moment the open is processed until the open if fully
+ * executed or closed before actually opened. It's NULL in all
+ * other cases. */
+ fd_t *first_fd;
+
+ /* The stub from the first open operation. When open fop starts
+ * being processed, it's assigned the OB_OPEN_PREPARING value
+ * until the actual stub is created. This is necessary to avoid
+ * creating the stub inside a locked region. Once the stub is
+ * successfully created, it's assigned here. This value is set
+ * to NULL once the stub is resumed. */
+ call_stub_t *first_open;
+
+ /* The total number of currently open fd's on this inode. */
+ int32_t open_count;
+
+ /* This flag is set as soon as we know that the open will be
+ * sent to the bricks, even before the stub is ready. */
+ bool triggered;
+} ob_inode_t;
+
+/* Dummy pointer used temporarily while the actual open stub is being created */
+#define OB_OPEN_PREPARING ((call_stub_t *)-1)
+
+#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \
+ case OB_STATE_FIRST_OPEN: \
+ gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \
+ "fop=%s", #_fop, "state=%d", __ob_state, NULL); \
+ default_##_fop##_failure_cbk(_frame, EINVAL); \
+ break; \
+ case OB_STATE_READY: \
+ default_##_fop(_frame, _xl, ##_args); \
+ break; \
+ case OB_STATE_OPEN_TRIGGERED: { \
+ call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \
+ ##_args); \
+ if (__ob_stub != NULL) { \
+ ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \
+ break; \
+ } \
+ __ob_state = -ENOMEM; \
+ } \
+ default: \
+ gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \
+ OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \
+ default_##_fop##_failure_cbk(_frame, -__ob_state)
+
+#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_fd( \
+ _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ if (!(_trigger)) { \
+ fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \
+ (_fd)->flags); \
+ if (__ob_fd != NULL) { \
+ default_##_fop(_frame, _xl, ##_args); \
+ fd_unref(__ob_fd); \
+ break; \
+ } \
+ __ob_state = -ENOMEM; \
+ } \
+ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_fd( \
+ _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \
+ break; \
+ OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \
+ do { \
+ ob_inode_t *__ob_inode; \
+ fd_t *__first_fd; \
+ ob_state_t __ob_state = ob_open_and_resume_inode( \
+ _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \
+ switch (__ob_state) { \
+ case OB_STATE_OPEN_PENDING: \
+ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \
+ } \
+ } while (0)
+
+static ob_inode_t *
+ob_inode_get_locked(xlator_t *this, inode_t *inode)
+{
+ ob_inode_t *ob_inode = NULL;
+ uint64_t value = 0;
+
+ if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
+ return (ob_inode_t *)(uintptr_t)value;
+ }
+
+ ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
+ if (ob_inode != NULL) {
+ ob_inode->inode = inode;
+ INIT_LIST_HEAD(&ob_inode->resume_fops);
+
+ value = (uint64_t)(uintptr_t)ob_inode;
+ if (__inode_ctx_set(inode, this, &value) < 0) {
+ GF_FREE(ob_inode);
+ ob_inode = NULL;
+ }
+ }
+
+ return ob_inode;
+}
+
+static ob_state_t
+ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
+ int32_t open_count, bool synchronous, bool trigger,
+ ob_inode_t **pob_inode, fd_t **pfd)
+{
+ ob_conf_t *conf;
+ ob_inode_t *ob_inode;
+ call_stub_t *open_stub;
+
+ if (inode == NULL) {
+ return OB_STATE_READY;
+ }
+
+ conf = xl->private;
+
+ *pfd = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ob_inode = ob_inode_get_locked(xl, inode);
+ if (ob_inode == NULL) {
+ UNLOCK(&inode->lock);
+
+ return -ENOMEM;
+ }
+ *pob_inode = ob_inode;
+
+ ob_inode->open_count += open_count;
+
+ /* If first_fd is not NULL, it means that there's a previous open not
+ * yet completed. */
+ if (ob_inode->first_fd != NULL) {
+ *pfd = ob_inode->first_fd;
+ /* If the current request doesn't trigger the open and it hasn't
+ * been triggered yet, we can continue without issuing the open
+ * only if the current request belongs to the same fd as the
+ * first one. */
+ if (!trigger && !ob_inode->triggered &&
+ (ob_inode->first_fd == fd)) {
+ UNLOCK(&inode->lock);
+
+ return OB_STATE_OPEN_PENDING;
+ }
+
+ /* We need to issue the open. It could have already been triggered
+ * before. In this case open_stub will be NULL. Or the initial open
+ * may not be completely ready yet. In this case open_stub will be
+ * OB_OPEN_PREPARING. */
+ open_stub = ob_inode->first_open;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = true;
+
+ UNLOCK(&inode->lock);
+
+ if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
+ call_resume(open_stub);
+ }
+
+ return OB_STATE_OPEN_TRIGGERED;
+ }
+
+ /* There's no pending open. Only opens can be non synchronous, so all
+ * regular fops will be processed directly. For non synchronous opens,
+ * we'll still process them normally (i.e. synchornous) if there are
+ * more file descriptors open. */
+ if (synchronous || (ob_inode->open_count > open_count)) {
+ UNLOCK(&inode->lock);
+
+ return OB_STATE_READY;
+ }
+
+ *pfd = fd;
+
+ /* This is the first open. We keep a reference on the fd and set
+ * first_open stub to OB_OPEN_PREPARING until the actual stub can
+ * be assigned (we don't create the stub here to avoid doing memory
+ * allocations inside the mutex). */
+ ob_inode->first_fd = __fd_ref(fd);
+ ob_inode->first_open = OB_OPEN_PREPARING;
+
+ /* If lazy_open is not set, we'll need to immediately send the open,
+ * so we set triggered right now. */
+ ob_inode->triggered = !conf->lazy_open;
+ }
+ UNLOCK(&inode->lock);
+
+ return OB_STATE_FIRST_OPEN;
+}
+
+static ob_state_t
+ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
+ bool synchronous, bool trigger, ob_inode_t **pob_inode,
+ fd_t **pfd)
+{
+ uint64_t err;
+
+ if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
+ return (ob_state_t)-err;
+ }
+
+ return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
+ trigger, pob_inode, pfd);
+}
+
+static ob_state_t
+ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
+ fd_t **pfd)
+{
+ bool synchronous;
+
+ /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
+ * we also execute this open synchronously ? */
+ synchronous = (flags & O_TRUNC) != 0;
+
+ return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
+}
+
+static int32_t
+ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
+{
+ LOCK(&ob_inode->inode->lock);
+ {
+ /* We only queue a stub if the open has not been completed or
+ * cancelled. */
+ if (ob_inode->first_fd == fd) {
+ list_add_tail(&stub->list, &ob_inode->resume_fops);
+ stub = NULL;
+ }
+ }
+ UNLOCK(&ob_inode->inode->lock);
+
+ if (stub != NULL) {
+ call_resume(stub);
+ }
+
+ return 0;
+}
+
+static void
+ob_open_destroy(call_stub_t *stub, fd_t *fd)
+{
+ stub->frame->local = NULL;
+ STACK_DESTROY(stub->frame->root);
+ call_stub_destroy(stub);
+ fd_unref(fd);
+}
+
+static int32_t
+ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ call_stub_t *stub)
+{
+ bool closed;
+
+ LOCK(&ob_inode->inode->lock);
+ {
+ closed = ob_inode->first_fd != fd;
+ if (!closed) {
+ if (ob_inode->triggered) {
+ ob_inode->first_open = NULL;
+ } else {
+ ob_inode->first_open = stub;
+ stub = NULL;
+ }
+ }
+ }
+ UNLOCK(&ob_inode->inode->lock);
+
+ if (stub != NULL) {
+ if (closed) {
+ ob_open_destroy(stub, fd);
+ } else {
+ call_resume(stub);
+ }
+ }
+
+ return 0;
+}
+
+static void
+ob_resume_pending(struct list_head *list)
+{
+ call_stub_t *stub;
+
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
+
+ call_resume(stub);
+ }
+}
+
+static void
+ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
+ int32_t op_errno)
+{
+ struct list_head list;
+
+ INIT_LIST_HEAD(&list);
+
+ if (op_ret < 0) {
+ fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
+ }
+
+ LOCK(&ob_inode->inode->lock);
+ {
+ /* Only update the fields if the file has not been closed before
+ * getting here. */
+ if (ob_inode->first_fd == fd) {
+ list_splice_init(&ob_inode->resume_fops, &list);
+ ob_inode->first_fd = NULL;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = false;
+ }
+ }
+ UNLOCK(&ob_inode->inode->lock);
+
+ ob_resume_pending(&list);
+
+ fd_unref(fd);
+}
+
+static int32_t
+ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ ob_inode_t *ob_inode;
+
+ ob_inode = frame->local;
+ frame->local = NULL;
+
+ ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
+
+ STACK_DESTROY(frame->root);
+
+ return 0;
+}
+
+static int32_t
+ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
+{
+ ob_inode_t *ob_inode;
+ call_frame_t *open_frame;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * or the current flags require a synchronous open. */
+ return default_open(frame, this, loc, flags, fd, xdata);
+ }
+
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
+
+ state = -ENOMEM;
+ }
+
+ if (state == OB_STATE_FIRST_OPEN) {
+ /* We try to create a stub for the new open. A new frame needs to be
+ * used because the current one may be destroyed soon after sending
+ * the open's reply. */
+ open_frame = copy_frame(frame);
+ if (open_frame != NULL) {
+ stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
+ xdata);
+ if (stub != NULL) {
+ open_frame->local = ob_inode;
+
+ /* TODO: Previous version passed xdata back to the caller, but
+ * probably this doesn't make sense since it won't contain
+ * any requested data. I think it would be better to pass
+ * NULL for xdata. */
+ default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
+
+ return ob_open_dispatch(this, ob_inode, first_fd, stub);
+ }
+
+ STACK_DESTROY(open_frame->root);
+ }
+
+ /* In case of error, simulate a regular completion but with an error
+ * code. */
+ ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
+
+ state = -ENOMEM;
+ }
+
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
+
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
+ }
+ UNLOCK(&fd->inode->lock);
+
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "open", "path=%s", loc->path, NULL);
+
+ return default_open_failure_cbk(frame, -state);
+}
+
+static int32_t
+ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ /* Create requests are never delayed. We always send them synchronously. */
+ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode,
+ &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * so we simply forward the request synchronously. */
+ return default_create(frame, this, loc, flags, mode, umask, fd, xdata);
+ }
+
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd,
+ xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
+
+ state = -ENOMEM;
+ }
+
+ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never
+ * be returned by ob_open_and_resume_fd(). If we are here it can only be
+ * because there has been a problem. */
+
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
+
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
+ }
+ UNLOCK(&fd->inode->lock);
+
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "create", "path=%s", loc->path, NULL);
+
+ return default_create_failure_cbk(frame, -state);
+}
+
+static int32_t
+ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ ob_conf_t *conf = this->private;
+ bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
+
+ OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
+ iobref, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ob_conf_t *conf = this->private;
+ bool trigger = !conf->use_anonymous_fd;
+
+ OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ ob_conf_t *conf = this->private;
+ bool trigger = !conf->use_anonymous_fd;
+
+ OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ OB_POST_FLUSH(this, frame, fd, fd, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
+{
+ OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
+ int valid, dict_t *xdata)
+{
+ OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
+ dict_t *xdata)
+{
+ OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
+ dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
+ dict_get(dict, GF_SELINUX_XATTR_KEY)) {
+ return default_setxattr(frame, this, loc, dict, flags, xdata);
+ }
+
+ OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
+ xdata);
+
+ return 0;
+}
+
+static void
+ob_fdclose(xlator_t *this, fd_t *fd)
+{
+ struct list_head list;
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+
+ INIT_LIST_HEAD(&list);
+ stub = NULL;
+
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode = ob_inode_get_locked(this, fd->inode);
+ if (ob_inode != NULL) {
+ ob_inode->open_count--;
+
+ /* If this fd is the same as ob_inode->first_fd, it means that
+ * the initial open has not fully completed. We'll try to cancel
+ * it. */
+ if (ob_inode->first_fd == fd) {
+ if (ob_inode->first_open == OB_OPEN_PREPARING) {
+ /* In this case ob_open_dispatch() has not been called yet.
+ * We clear first_fd and first_open to allow that function
+ * to know that the open is not really needed. This also
+ * allows other requests to work as expected if they
+ * arrive before the dispatch function is called. If there
+ * are pending fops, we can directly process them here.
+ * (note that there shouldn't be any fd related fops, but
+ * if there are, it's fine if they fail). */
+ ob_inode->first_fd = NULL;
+ ob_inode->first_open = NULL;
+ ob_inode->triggered = false;
+ list_splice_init(&ob_inode->resume_fops, &list);
+ } else if (!ob_inode->triggered) {
+ /* If the open has already been dispatched, we can only
+ * cancel it if it has not been triggered. Otherwise we
+ * simply wait until it completes. While it's not triggered,
+ * first_open must be a valid stub and there can't be any
+ * pending fops. */
+ GF_ASSERT((ob_inode->first_open != NULL) &&
+ list_empty(&ob_inode->resume_fops));
+
+ ob_inode->first_fd = NULL;
+ stub = ob_inode->first_open;
+ ob_inode->first_open = NULL;
+ }
+ }
+ }
+ }
+ UNLOCK(&fd->inode->lock);
+
+ if (stub != NULL) {
+ ob_open_destroy(stub, fd);
+ }
+
+ ob_resume_pending(&list);
+}
+
+int
+ob_forget(xlator_t *this, inode_t *inode)
+{
+ ob_inode_t *ob_inode;
+ uint64_t value = 0;
+
+ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
+ ob_inode = (ob_inode_t *)(uintptr_t)value;
+ GF_FREE(ob_inode);
+ }
+
+ return 0;
+}
+
+int
+ob_priv_dump(xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
+ "priv");
+
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("use_anonymous_fd", "%d", conf->use_anonymous_fd);
+
+ gf_proc_dump_write("lazy_open", "%d", conf->lazy_open);
+
+ return 0;
+}
+
+int
+ob_fdctx_dump(xlator_t *this, fd_t *fd)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ uint64_t value = 0;
+ int ret = 0, error = 0;
+
+ ret = TRY_LOCK(&fd->lock);
+ if (ret)
+ return 0;
+
+ if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
+ error = (int32_t)value;
+ }
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
+ "file");
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("fd", "%p", fd);
+
+ gf_proc_dump_write("error", "%d", error);
+
+ UNLOCK(&fd->lock);
+
+ return 0;
+}
+
+int
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_ob_mt_end + 1);
+
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, OPEN_BEHIND_MSG_NO_MEMORY,
+ "Memory accounting failed");
+
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ ob_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("use-anonymous-fd", conf->use_anonymous_fd, options, bool,
+ out);
+
+ GF_OPTION_RECONF("lazy-open", conf->lazy_open, options, bool, out);
+
+ GF_OPTION_RECONF("read-after-open", conf->read_after_open, options, bool,
+ out);
+
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+init(xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child",
+ this->name);
+ return -1;
+ }
+
+ if (!this->parents)
+ gf_msg(this->name, GF_LOG_WARNING, 0, OPEN_BEHIND_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_ob_mt_conf_t);
+ if (!conf)
+ goto err;
+
+ GF_OPTION_INIT("use-anonymous-fd", conf->use_anonymous_fd, bool, err);
+
+ GF_OPTION_INIT("lazy-open", conf->lazy_open, bool, err);
+
+ GF_OPTION_INIT("read-after-open", conf->read_after_open, bool, err);
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, err);
+
+ this->private = conf;
+
+ return 0;
+err:
+ if (conf)
+ GF_FREE(conf);
+
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ GF_FREE(conf);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = ob_open,
+ .create = ob_create,
+ .readv = ob_readv,
+ .writev = ob_writev,
+ .flush = ob_flush,
+ .fsync = ob_fsync,
+ .fstat = ob_fstat,
+ .seek = ob_seek,
+ .ftruncate = ob_ftruncate,
+ .fsetxattr = ob_fsetxattr,
+ .setxattr = ob_setxattr,
+ .fgetxattr = ob_fgetxattr,
+ .fremovexattr = ob_fremovexattr,
+ .finodelk = ob_finodelk,
+ .fentrylk = ob_fentrylk,
+ .fxattrop = ob_fxattrop,
+ .fsetattr = ob_fsetattr,
+ .setattr = ob_setattr,
+ .fallocate = ob_fallocate,
+ .discard = ob_discard,
+ .zerofill = ob_zerofill,
+ .unlink = ob_unlink,
+ .rename = ob_rename,
+ .lk = ob_lk,
+};
+
+struct xlator_cbks cbks = {
+ .fdclose = ob_fdclose,
+ .forget = ob_forget,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = ob_priv_dump,
+ .fdctx = ob_fdctx_dump,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"open-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable open-behind",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"use-anonymous-fd"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description =
+ "For read operations, use anonymous FD when "
+ "original FD is open-behind and not yet opened in the backend.",
+ },
+ {
+ .key = {"lazy-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description =
+ "Perform open in the backend only when a necessary "
+ "FOP arrives (e.g writev on the FD, unlink of the file). When "
+ "option "
+ "is disabled, perform backend open right after unwinding open().",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {},
+ /* option_validation_fn validate_fn; */
+ },
+ {
+ .key = {"read-after-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "read is sent only after actual open happens and real "
+ "fd is obtained, instead of doing on anonymous fd "
+ "(similar to write)",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {},
+ /* option_validation_fn validate_fn; */
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"open-behind"},
+ .description = "Enable/Disable open behind translator"},
+ {.key = {NULL}}
+
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "open-behind",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am
index 644f27e3f74..8eb6cece738 100644
--- a/xlators/performance/quick-read/src/Makefile.am
+++ b/xlators/performance/quick-read/src/Makefile.am
@@ -1,14 +1,16 @@
xlator_LTLIBRARIES = quick-read.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-quick_read_la_LDFLAGS = -module -avoidversion
+quick_read_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quick_read_la_SOURCES = quick-read.c
quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quick-read.h
+noinst_HEADERS = quick-read.h quick-read-mem-types.h quick-read-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h
new file mode 100644
index 00000000000..e4aef8549ff
--- /dev/null
+++ b/xlators/performance/quick-read/src/quick-read-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QR_MEM_TYPES_H__
+#define __QR_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_qr_mem_types_ {
+ gf_qr_mt_qr_inode_t = gf_common_mt_end + 1,
+ gf_qr_mt_content_t,
+ gf_qr_mt_qr_priority_t,
+ gf_qr_mt_qr_private_t,
+ gf_qr_mt_end
+};
+#endif
diff --git a/xlators/performance/quick-read/src/quick-read-messages.h b/xlators/performance/quick-read/src/quick-read-messages.h
new file mode 100644
index 00000000000..da9724a3c9c
--- /dev/null
+++ b/xlators/performance/quick-read/src/quick-read-messages.h
@@ -0,0 +1,31 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUICK_READ_MESSAGES_H_
+#define _QUICK_READ_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(QUICK_READ, QUICK_READ_MSG_ENFORCEMENT_FAILED,
+ QUICK_READ_MSG_INVALID_ARGUMENT,
+ QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED, QUICK_READ_MSG_NO_MEMORY,
+ QUICK_READ_MSG_VOL_MISCONFIGURED, QUICK_READ_MSG_DICT_SET_FAILED,
+ QUICK_READ_MSG_INVALID_CONFIG, QUICK_READ_MSG_LRU_NOT_EMPTY);
+
+#endif /* _QUICK_READ_MESSAGES_H_ */
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index 5cdceb4aeb3..7fe4b3c3a4b 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -1,2631 +1,1644 @@
/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <math.h>
#include "quick-read.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
+#include "quick-read-messages.h"
+#include <glusterfs/upcall-utils.h>
+#include <glusterfs/atomic.h>
-#define QR_DEFAULT_CACHE_SIZE 134217728 /* 128MB */
+typedef struct qr_local {
+ inode_t *inode;
+ uint64_t incident_gen;
+ fd_t *fd;
+} qr_local_t;
-int32_t
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset);
+qr_inode_t *
+qr_inode_ctx_get(xlator_t *this, inode_t *inode);
+void
+__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table,
+ qr_inode_t *qr_inode);
void
-qr_local_free (qr_local_t *local)
+qr_local_wipe(qr_local_t *local)
{
- if (local == NULL) {
- goto out;
- }
+ if (!local)
+ goto out;
- if (local->stub != NULL) {
- call_stub_destroy (local->stub);
- }
+ if (local->inode)
+ inode_unref(local->inode);
- if (local->path != NULL) {
- FREE (local->path);
- }
-
- FREE (local);
+ if (local->fd)
+ fd_unref(local->fd);
+ GF_FREE(local);
out:
- return;
+ return;
}
+uint64_t
+__qr_get_generation(xlator_t *this, qr_inode_t *qr_inode)
+{
+ uint64_t gen = 0, rollover;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+
+ gen = GF_ATOMIC_INC(priv->generation);
+ if (gen == 0) {
+ qr_inode->gen_rollover = !qr_inode->gen_rollover;
+ gen = GF_ATOMIC_INC(priv->generation);
+ __qr_inode_prune_data(this, table, qr_inode);
+ qr_inode->gen = qr_inode->invalidation_time = gen - 1;
+ }
+
+ rollover = qr_inode->gen_rollover;
+ gen |= (rollover << 32);
+ return gen;
+}
-static void
-qr_loc_wipe (loc_t *loc)
+uint64_t
+qr_get_generation(xlator_t *this, inode_t *inode)
{
- if (loc == NULL) {
- goto out;
- }
+ qr_inode_t *qr_inode = NULL;
+ uint64_t gen = 0;
+ qr_inode_table_t *table = NULL;
+ qr_private_t *priv = NULL;
- if (loc->path) {
- FREE (loc->path);
- loc->path = NULL;
- }
+ priv = this->private;
+ table = &priv->table;
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
+ qr_inode = qr_inode_ctx_get(this, inode);
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
+ if (qr_inode) {
+ LOCK(&table->lock);
+ {
+ gen = __qr_get_generation(this, qr_inode);
+ }
+ UNLOCK(&table->lock);
+ } else {
+ gen = GF_ATOMIC_INC(priv->generation);
+ if (gen == 0) {
+ gen = GF_ATOMIC_INC(priv->generation);
}
+ }
-out:
- return;
+ return gen;
}
-
-static int32_t
-qr_loc_fill (loc_t *loc, inode_t *inode, char *path)
+qr_local_t *
+qr_local_get(xlator_t *this, inode_t *inode)
{
- int32_t ret = -1;
- char *parent = NULL;
+ qr_local_t *local = NULL;
- if ((loc == NULL) || (inode == NULL) || (path == NULL)) {
- ret = -1;
- errno = EINVAL;
- goto out;
- }
+ local = GF_CALLOC(1, sizeof(*local), gf_common_mt_char);
+ if (!local)
+ goto out;
- loc->inode = inode_ref (inode);
- loc->path = strdup (path);
- loc->ino = inode->ino;
+ local->incident_gen = qr_get_generation(this, inode);
+out:
+ return local;
+}
- parent = strdup (path);
- if (parent == NULL) {
- ret = -1;
- goto out;
- }
+#define QR_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ qr_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ qr_local_wipe(__local); \
+ } while (0)
- parent = dirname (parent);
+void
+__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode,
+ uint64_t gen);
- loc->parent = inode_from_path (inode->table, parent);
- if (loc->parent == NULL) {
- ret = -1;
- errno = EINVAL;
- goto out;
- }
+int
+__qr_inode_ctx_set(xlator_t *this, inode_t *inode, qr_inode_t *qr_inode)
+{
+ uint64_t value = 0;
+ int ret = -1;
- loc->name = strrchr (loc->path, '/');
- ret = 0;
-out:
- if (ret == -1) {
- qr_loc_wipe (loc);
+ value = (long)qr_inode;
- }
+ ret = __inode_ctx_set(inode, this, &value);
- if (parent) {
- FREE (parent);
- }
-
- return ret;
+ return ret;
}
-
-void
-qr_resume_pending_ops (qr_fd_ctx_t *qr_fd_ctx)
+qr_inode_t *
+__qr_inode_ctx_get(xlator_t *this, inode_t *inode)
{
- struct list_head waiting_ops;
- call_stub_t *stub = NULL, *tmp = NULL;
-
- if (qr_fd_ctx == NULL) {
- goto out;
- }
+ qr_inode_t *qr_inode = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- INIT_LIST_HEAD (&waiting_ops);
+ ret = __inode_ctx_get(inode, this, &value);
+ if (ret)
+ return NULL;
- LOCK (&qr_fd_ctx->lock);
- {
- list_splice_init (&qr_fd_ctx->waiting_ops,
- &waiting_ops);
- }
- UNLOCK (&qr_fd_ctx->lock);
+ qr_inode = (void *)((long)value);
- if (!list_empty (&waiting_ops)) {
- list_for_each_entry_safe (stub, tmp, &waiting_ops, list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
- }
-
-out:
- return;
+ return qr_inode;
}
-
-static void
-qr_fd_ctx_free (qr_fd_ctx_t *qr_fd_ctx)
+qr_inode_t *
+qr_inode_ctx_get(xlator_t *this, inode_t *inode)
{
- if (qr_fd_ctx == NULL) {
- goto out;
- }
+ qr_inode_t *qr_inode = NULL;
- assert (list_empty (&qr_fd_ctx->waiting_ops));
+ if (inode == NULL)
+ goto out;
- FREE (qr_fd_ctx->path);
- FREE (qr_fd_ctx);
+ LOCK(&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get(this, inode);
+ }
+ UNLOCK(&inode->lock);
out:
- return;
+ return qr_inode;
}
-
-static inline uint32_t
-is_match (const char *path, const char *pattern)
+qr_inode_t *
+qr_inode_new(xlator_t *this, inode_t *inode)
{
- int32_t ret = 0;
+ qr_inode_t *qr_inode = NULL;
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
-
- return (ret == 0);
-}
+ qr_inode = GF_CALLOC(1, sizeof(*qr_inode), gf_qr_mt_qr_inode_t);
+ if (!qr_inode)
+ return NULL;
-uint32_t
-qr_get_priority (qr_conf_t *conf, const char *path)
-{
- uint32_t priority = 0;
- struct qr_priority *curr = NULL;
-
- list_for_each_entry (curr, &conf->priority_list, list) {
- if (is_match (path, curr->pattern))
- priority = curr->priority;
- }
+ INIT_LIST_HEAD(&qr_inode->lru);
- return priority;
-}
+ qr_inode->priority = 0; /* initial priority */
+ return qr_inode;
+}
-/* To be called with this-priv->table.lock held */
qr_inode_t *
-__qr_inode_alloc (xlator_t *this, char *path, inode_t *inode)
+qr_inode_ctx_get_or_new(xlator_t *this, inode_t *inode)
{
- qr_inode_t *qr_inode = NULL;
- qr_private_t *priv = NULL;
- int priority = 0;
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ qr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- qr_inode = CALLOC (1, sizeof (*qr_inode));
- if (qr_inode == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
+ LOCK(&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get(this, inode);
+ if (qr_inode)
+ goto unlock;
- INIT_LIST_HEAD (&qr_inode->lru);
-
- priority = qr_get_priority (&priv->conf, path);
+ qr_inode = qr_inode_new(this, inode);
+ if (!qr_inode)
+ goto unlock;
- list_add_tail (&qr_inode->lru, &priv->table.lru[priority]);
+ ret = __qr_inode_ctx_set(this, inode, qr_inode);
+ if (ret) {
+ __qr_inode_prune(this, &priv->table, qr_inode, 0);
+ GF_FREE(qr_inode);
+ qr_inode = NULL;
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
- qr_inode->inode = inode;
- qr_inode->priority = priority;
-out:
- return qr_inode;
+ return qr_inode;
}
-
-/* To be called with qr_inode->table->lock held */
-void
-__qr_inode_free (qr_inode_t *qr_inode)
+uint32_t
+qr_get_priority(qr_conf_t *conf, const char *path)
{
- if (qr_inode == NULL) {
- goto out;
- }
+ uint32_t priority = 0;
+ struct qr_priority *curr = NULL;
- if (qr_inode->xattr) {
- dict_unref (qr_inode->xattr);
- }
-
- list_del (&qr_inode->lru);
+ list_for_each_entry(curr, &conf->priority_list, list)
+ {
+ if (fnmatch(curr->pattern, path, FNM_NOESCAPE) == 0)
+ priority = curr->priority;
+ }
- FREE (qr_inode);
-out:
- return;
+ return priority;
}
-
-/* To be called with priv->table.lock held */
void
-__qr_cache_prune (xlator_t *this)
+__qr_inode_register(xlator_t *this, qr_inode_table_t *table,
+ qr_inode_t *qr_inode)
{
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
- qr_inode_table_t *table = NULL;
- qr_inode_t *curr = NULL, *next = NULL;
- int32_t index = 0;
- uint64_t size_to_prune = 0;
- uint64_t size_pruned = 0;
-
- priv = this->private;
- table = &priv->table;
- conf = &priv->conf;
-
- size_to_prune = table->cache_used - conf->cache_size;
-
- for (index=0; index < conf->max_pri; index++) {
- list_for_each_entry_safe (curr, next, &table->lru[index], lru) {
- size_pruned += curr->stbuf.st_size;
- inode_ctx_del (curr->inode, this, NULL);
- __qr_inode_free (curr);
- if (size_pruned >= size_to_prune)
- goto done;
- }
- }
+ qr_private_t *priv = NULL;
-done:
- table->cache_used -= size_pruned;
- return;
-}
+ if (!qr_inode->data)
+ return;
+ priv = this->private;
+ if (!priv)
+ return;
-/* To be called with table->lock held */
-inline char
-__qr_need_cache_prune (qr_conf_t *conf, qr_inode_table_t *table)
-{
- return (table->cache_used > conf->cache_size);
+ if (list_empty(&qr_inode->lru))
+ /* first time addition of this qr_inode into table */
+ table->cache_used += qr_inode->size;
+ else
+ list_del_init(&qr_inode->lru);
+
+ list_add_tail(&qr_inode->lru, &table->lru[qr_inode->priority]);
+
+ GF_ATOMIC_INC(priv->qr_counter.files_cached);
+
+ return;
}
-
-int32_t
-qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict, struct stat *postparent)
+void
+qr_inode_set_priority(xlator_t *this, inode_t *inode, const char *path)
{
- data_t *content = NULL;
- qr_inode_t *qr_inode = NULL;
- uint64_t value = 0;
- int ret = -1;
- qr_conf_t *conf = NULL;
- qr_inode_table_t *table = NULL;
- qr_private_t *priv = NULL;
- qr_local_t *local = NULL;
-
- if ((op_ret == -1) || (dict == NULL)) {
- goto out;
- }
-
- conf = this->private;
- priv = this->private;
- conf = &priv->conf;
- table = &priv->table;
+ uint32_t priority = 0;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ return;
- local = frame->local;
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
- if (buf->st_size > conf->max_file_size) {
- goto out;
- }
+ if (path)
+ priority = qr_get_priority(conf, path);
+ else
+ /* retain existing priority, just bump LRU */
+ priority = qr_inode->priority;
- if (S_ISDIR (buf->st_mode)) {
- goto out;
- }
+ LOCK(&table->lock);
+ {
+ qr_inode->priority = priority;
- if (inode == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ __qr_inode_register(this, table, qr_inode);
+ }
+ UNLOCK(&table->lock);
+}
- content = dict_get (dict, GLUSTERFS_CONTENT_KEY);
- if (content == NULL) {
- goto out;
- }
+void
+__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table,
+ qr_inode_t *qr_inode)
+{
+ qr_private_t *priv = NULL;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (inode, this, &value);
- if (ret == -1) {
- qr_inode = __qr_inode_alloc (this, local->path, inode);
- if (qr_inode == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
-
- ret = inode_ctx_put (inode, this,
- (uint64_t)(long)qr_inode);
- if (ret == -1) {
- __qr_inode_free (qr_inode);
- qr_inode = NULL;
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
- } else {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
- }
-
- if (qr_inode->xattr) {
- dict_unref (qr_inode->xattr);
- qr_inode->xattr = NULL;
-
- table->cache_used -= qr_inode->stbuf.st_size;
- }
-
- qr_inode->xattr = dict_ref (dict);
- qr_inode->stbuf = *buf;
- table->cache_used += buf->st_size;
-
- gettimeofday (&qr_inode->tv, NULL);
-
- if (__qr_need_cache_prune (conf, table)) {
- __qr_cache_prune (this);
- }
- }
-unlock:
- UNLOCK (&table->lock);
+ priv = this->private;
+ GF_FREE(qr_inode->data);
+ qr_inode->data = NULL;
-out:
- /*
- * FIXME: content size in dict can be greater than the size application
- * requested for. Applications need to be careful till this is fixed.
- */
- QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, dict,
- postparent);
- return 0;
-}
+ if (!list_empty(&qr_inode->lru)) {
+ table->cache_used -= qr_inode->size;
+ qr_inode->size = 0;
+ list_del_init(&qr_inode->lru);
-int32_t
-qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- qr_conf_t *conf = NULL;
- dict_t *new_req_dict = NULL;
- int32_t op_ret = -1, op_errno = -1;
- data_t *content = NULL;
- uint64_t requested_size = 0, size = 0, value = 0;
- char cached = 0;
- qr_inode_t *qr_inode = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- qr_local_t *local = NULL;
-
- priv = this->private;
- conf = &priv->conf;
- if (conf == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+ GF_ATOMIC_DEC(priv->qr_counter.files_cached);
+ }
- table = &priv->table;
+ memset(&qr_inode->buf, 0, sizeof(qr_inode->buf));
+}
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno,
- ENOMEM);
+/* To be called with priv->table.lock held */
+void
+__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode,
+ uint64_t gen)
+{
+ __qr_inode_prune_data(this, table, qr_inode);
+ if (gen)
+ qr_inode->gen = gen;
+ qr_inode->invalidation_time = __qr_get_generation(this, qr_inode);
+}
- frame->local = local;
+void
+qr_inode_prune(xlator_t *this, inode_t *inode, uint64_t gen)
+{
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
- local->path = strdup (loc->path);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno,
- ENOMEM);
- LOCK (&table->lock);
- {
- op_ret = inode_ctx_get (loc->inode, this, &value);
- if (op_ret == 0) {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode != NULL) {
- if (qr_inode->xattr) {
- cached = 1;
- }
- }
- }
- }
- UNLOCK (&table->lock);
-
- if ((xattr_req == NULL) && (conf->max_file_size > 0)) {
- new_req_dict = xattr_req = dict_new ();
- if (xattr_req == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
- }
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ return;
- if (!cached) {
- if (xattr_req) {
- content = dict_get (xattr_req, GLUSTERFS_CONTENT_KEY);
- if (content) {
- requested_size = data_to_uint64 (content);
- }
- }
-
- if ((conf->max_file_size > 0)
- && (conf->max_file_size != requested_size)) {
- size = (conf->max_file_size > requested_size) ?
- conf->max_file_size : requested_size;
-
- op_ret = dict_set (xattr_req, GLUSTERFS_CONTENT_KEY,
- data_from_uint64 (size));
- if (op_ret < 0) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- }
- }
+ priv = this->private;
+ table = &priv->table;
- STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ LOCK(&table->lock);
+ {
+ __qr_inode_prune(this, table, qr_inode, gen);
+ }
+ UNLOCK(&table->lock);
+}
- if (new_req_dict) {
- dict_unref (new_req_dict);
- }
+/* To be called with priv->table.lock held */
+void
+__qr_cache_prune(xlator_t *this, qr_inode_table_t *table, qr_conf_t *conf)
+{
+ qr_inode_t *curr = NULL;
+ qr_inode_t *next = NULL;
+ int index = 0;
+ size_t size_pruned = 0;
- return 0;
+ for (index = 0; index < conf->max_pri; index++) {
+ list_for_each_entry_safe(curr, next, &table->lru[index], lru)
+ {
+ size_pruned += curr->size;
-unwind:
- QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, NULL,
- NULL);
+ __qr_inode_prune(this, table, curr, 0);
- if (new_req_dict) {
- dict_unref (new_req_dict);
+ if (table->cache_used < conf->cache_size)
+ return;
}
+ }
- return 0;
+ return;
}
-
-int32_t
-qr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+void
+qr_cache_prune(xlator_t *this)
{
- uint64_t value = 0;
- int32_t ret = -1;
- struct list_head waiting_ops;
- qr_local_t *local = NULL;
- qr_inode_t *qr_inode = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *stub = NULL, *tmp = NULL;
- char is_open = 0;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- priv = this->private;
- table = &priv->table;
-
- local = frame->local;
- if (local != NULL) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- is_open = local->is_open;
- }
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_table_t *table = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
+
+ LOCK(&table->lock);
+ {
+ if (table->cache_used > conf->cache_size)
+ __qr_cache_prune(this, table, conf);
+ }
+ UNLOCK(&table->lock);
+}
- INIT_LIST_HEAD (&waiting_ops);
+void *
+qr_content_extract(dict_t *xdata)
+{
+ data_t *data = NULL;
+ void *content = NULL;
+ int ret = 0;
- ret = fd_ctx_get (fd, this, &value);
- if ((ret == -1) && (op_ret != -1)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ ret = dict_get_with_ref(xdata, GF_CONTENT_KEY, &data);
+ if (ret < 0 || !data)
+ return NULL;
- if (value) {
- qr_fd_ctx = (qr_fd_ctx_t *) (long)value;
- }
+ content = GF_MALLOC(data->len, gf_qr_mt_content_t);
+ if (!content)
+ goto out;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- qr_fd_ctx->open_in_transit = 0;
-
- if (op_ret == 0) {
- qr_fd_ctx->opened = 1;
- }
- list_splice_init (&qr_fd_ctx->waiting_ops,
- &waiting_ops);
- }
- UNLOCK (&qr_fd_ctx->lock);
-
- if (local && local->is_open
- && ((local->open_flags & O_TRUNC) == O_TRUNC)) {
- LOCK (&table->lock);
- {
- ret = inode_ctx_del (fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
-
- if (qr_inode != NULL) {
- __qr_inode_free (qr_inode);
- }
- }
- }
- UNLOCK (&table->lock);
- }
-
- if (!list_empty (&waiting_ops)) {
- list_for_each_entry_safe (stub, tmp, &waiting_ops,
- list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
- }
- }
-out:
- if (is_open) {
- QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
+ memcpy(content, data->data, data->len);
- return 0;
+out:
+ data_unref(data);
+ return content;
}
-
-int32_t
-qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+void
+qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
+ struct iatt *buf, uint64_t gen)
{
- qr_inode_t *qr_inode = NULL;
- int32_t ret = -1;
- uint64_t filep = 0;
- char content_cached = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL, *tmp_fd_ctx = NULL;
- int32_t op_ret = -1, op_errno = -1;
- qr_local_t *local = NULL;
- qr_conf_t *conf = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- priv = this->private;
- conf = &priv->conf;
- table = &priv->table;
-
- tmp_fd_ctx = qr_fd_ctx = CALLOC (1, sizeof (*qr_fd_ctx));
- if (qr_fd_ctx == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ uint32_t rollover = 0;
- LOCK_INIT (&qr_fd_ctx->lock);
- INIT_LIST_HEAD (&qr_fd_ctx->waiting_ops);
+ rollover = gen >> 32;
+ gen = gen & 0xffffffff;
- qr_fd_ctx->path = strdup (loc->path);
- qr_fd_ctx->flags = flags;
+ priv = this->private;
+ table = &priv->table;
- ret = fd_ctx_set (fd, this, (uint64_t)(long)qr_fd_ctx);
- if (ret == -1) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
- tmp_fd_ctx = NULL;
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ LOCK(&table->lock);
+ {
+ if ((rollover != qr_inode->gen_rollover) ||
+ (gen && qr_inode->gen && (qr_inode->gen >= gen)))
+ goto unlock;
- local->is_open = 1;
- local->open_flags = flags;
- frame->local = local;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (fd->inode, this, &filep);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) filep;
- if (qr_inode) {
- if (qr_inode->xattr) {
- content_cached = 1;
- }
- }
- }
- }
- UNLOCK (&table->lock);
+ if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen))
+ goto unlock;
- if (content_cached && ((flags & O_DIRECTORY) == O_DIRECTORY)) {
- op_ret = -1;
- op_errno = ENOTDIR;
- goto unwind;
- }
+ __qr_inode_prune(this, table, qr_inode, gen);
- if (!content_cached || ((flags & O_ACCMODE) == O_WRONLY)
- || ((flags & O_TRUNC) == O_TRUNC)
- || ((flags & O_DIRECT) == O_DIRECT)) {
- LOCK (&qr_fd_ctx->lock);
- {
- /*
- * we really need not set this flag, since open is
- * not yet unwounded.
- */
-
- qr_fd_ctx->open_in_transit = 1;
- if ((flags & O_DIRECT) == O_DIRECT) {
- qr_fd_ctx->disabled = 1;
- }
- }
- UNLOCK (&qr_fd_ctx->lock);
- goto wind;
- } else {
- op_ret = 0;
- op_errno = 0;
- goto unwind;
- }
+ qr_inode->data = data;
+ data = NULL;
+ qr_inode->size = buf->ia_size;
-unwind:
- if (tmp_fd_ctx != NULL) {
- qr_fd_ctx_free (tmp_fd_ctx);
- }
+ qr_inode->ia_mtime = buf->ia_mtime;
+ qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec;
+ qr_inode->ia_ctime = buf->ia_ctime;
+ qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec;
- QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- return 0;
+ qr_inode->buf = *buf;
+ qr_inode->last_refresh = gf_time();
-wind:
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
- return 0;
-}
+ __qr_inode_register(this, table, qr_inode);
+ }
+unlock:
+ UNLOCK(&table->lock);
+ if (data)
+ GF_FREE(data);
-static inline char
-qr_time_elapsed (struct timeval *now, struct timeval *then)
-{
- return now->tv_sec - then->tv_sec;
+ qr_cache_prune(this);
}
-
-static inline char
-qr_need_validation (qr_conf_t *conf, qr_inode_t *qr_inode)
+gf_boolean_t
+qr_size_fits(qr_conf_t *conf, struct iatt *buf)
{
- struct timeval now = {0, };
- char need_validation = 0;
-
- gettimeofday (&now, NULL);
+ return (buf->ia_size <= conf->max_file_size);
+}
- if (qr_time_elapsed (&now, &qr_inode->tv) >= conf->cache_timeout)
- need_validation = 1;
+gf_boolean_t
+qr_mtime_equal(qr_inode_t *qr_inode, struct iatt *buf)
+{
+ return (qr_inode->ia_mtime == buf->ia_mtime &&
+ qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec);
+}
- return need_validation;
+gf_boolean_t
+qr_ctime_equal(qr_inode_t *qr_inode, struct iatt *buf)
+{
+ return (qr_inode->ia_ctime == buf->ia_ctime &&
+ qr_inode->ia_ctime_nsec == buf->ia_ctime_nsec);
}
+gf_boolean_t
+qr_time_equal(qr_conf_t *conf, qr_inode_t *qr_inode, struct iatt *buf)
+{
+ if (conf->ctime_invalidation)
+ return qr_ctime_equal(qr_inode, buf);
+ else
+ return qr_mtime_equal(qr_inode, buf);
+}
-static int32_t
-qr_validate_cache_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+void
+__qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf,
+ uint64_t gen)
{
- qr_inode_t *qr_inode = NULL;
- qr_local_t *local = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
- call_stub_t *stub = NULL;
-
- local = frame->local;
- if ((local == NULL) || ((local->fd) == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_conf_t *conf = NULL;
+ uint32_t rollover = 0;
- priv = this->private;
- table = &priv->table;
+ rollover = gen >> 32;
+ gen = gen & 0xffffffff;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (local->fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
- }
-
- if (qr_inode != NULL) {
- gettimeofday (&qr_inode->tv, NULL);
-
- if ((qr_inode->stbuf.st_mtime != buf->st_mtime)
- || (ST_MTIM_NSEC(&qr_inode->stbuf) !=
- ST_MTIM_NSEC(buf))) {
- inode_ctx_del (local->fd->inode, this, NULL);
- __qr_inode_free (qr_inode);
- }
- }
- }
- UNLOCK (&table->lock);
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
- stub = local->stub;
- local->stub = NULL;
- local->just_validated = 1;
- call_resume (stub);
+ /* allow for rollover of frame->root->unique */
+ if ((rollover != qr_inode->gen_rollover) ||
+ (gen && qr_inode->gen && (qr_inode->gen >= gen)))
+ goto done;
- return 0;
+ if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen))
+ goto done;
-unwind:
- /* this is actually unwind of readv */
- QR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, NULL,
- NULL);
- return 0;
-}
+ qr_inode->gen = gen;
+ if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) {
+ qr_inode->buf = *buf;
+ qr_inode->last_refresh = gf_time();
+ __qr_inode_register(this, table, qr_inode);
+ } else {
+ __qr_inode_prune(this, table, qr_inode, gen);
+ }
-int32_t
-qr_validate_cache_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- qr_local_t *local = NULL;
- int32_t op_ret = -1, op_errno = -1;
+done:
+ return;
+}
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- } else {
- op_ret = local->op_ret;
- op_errno = local->op_errno;
- }
+void
+qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf,
+ uint64_t gen)
+{
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
- if (op_ret == -1) {
- qr_validate_cache_cbk (frame, NULL, this, op_ret, op_errno,
- NULL);
- } else {
- STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- }
+ priv = this->private;
+ table = &priv->table;
- return 0;
+ LOCK(&table->lock);
+ {
+ __qr_content_refresh(this, qr_inode, buf, gen);
+ }
+ UNLOCK(&table->lock);
}
-
-int
-qr_validate_cache (call_frame_t *frame, xlator_t *this, fd_t *fd,
- call_stub_t *stub)
+gf_boolean_t
+__qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode)
{
- int ret = -1;
- int flags = 0;
- uint64_t value = 0;
- loc_t loc = {0, };
- char *path = NULL;
- qr_local_t *local = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *validate_stub = NULL;
- char need_open = 0, can_wind = 0;
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- goto out;
- }
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
- local->fd = fd;
- local->stub = stub;
- frame->local = local;
+ priv = this->private;
+ conf = &priv->conf;
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ if (qr_inode->last_refresh < priv->last_child_down)
+ return _gf_false;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- validate_stub = fop_fstat_stub (frame,
- qr_validate_cache_helper,
- fd);
- if (validate_stub == NULL) {
- ret = -1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&validate_stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
-
- if (ret == -1) {
- goto out;
- }
- } else {
- can_wind = 1;
- }
+ if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout)
+ return _gf_false;
- if (need_open) {
- ret = qr_loc_fill (&loc, fd->inode, path);
- if (ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- } else if (can_wind) {
- STACK_WIND (frame, qr_validate_cache_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- }
+ return _gf_true;
+}
- ret = 0;
+int
+qr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode_ret, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ void *content = NULL;
+ qr_inode_t *qr_inode = NULL;
+ inode_t *inode = NULL;
+ qr_local_t *local = NULL;
+
+ local = frame->local;
+ inode = local->inode;
+
+ if (op_ret == -1) {
+ qr_inode_prune(this, inode, local->incident_gen);
+ goto out;
+ }
+
+ if (dict_get(xdata, GLUSTERFS_BAD_INODE)) {
+ qr_inode_prune(this, inode, local->incident_gen);
+ goto out;
+ }
+
+ if (dict_get(xdata, "sh-failed")) {
+ qr_inode_prune(this, inode, local->incident_gen);
+ goto out;
+ }
+
+ content = qr_content_extract(xdata);
+
+ if (content) {
+ /* new content came along, always replace old content */
+ qr_inode = qr_inode_ctx_get_or_new(this, inode);
+ if (!qr_inode) {
+ /* no harm done */
+ GF_FREE(content);
+ goto out;
+ }
+
+ qr_content_update(this, qr_inode, content, buf, local->incident_gen);
+ } else {
+ /* purge old content if necessary */
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ /* usual path for large files */
+ goto out;
+
+ qr_content_refresh(this, qr_inode, buf, local->incident_gen);
+ }
out:
- return ret;
+ QR_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode_ret, buf, xdata,
+ postparent);
+ return 0;
}
-
-int32_t
-qr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
+int
+qr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
- return 0;
-}
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ dict_t *new_xdata = NULL;
+ qr_local_t *local = NULL;
+
+ priv = this->private;
+ conf = &priv->conf;
+ local = qr_local_get(this, loc->inode);
+ local->inode = inode_ref(loc->inode);
+ frame->local = local;
+
+ qr_inode = qr_inode_ctx_get(this, loc->inode);
+ if (qr_inode && qr_inode->data)
+ /* cached. only validate in qr_lookup_cbk */
+ goto wind;
+
+ if (!xdata)
+ xdata = new_xdata = dict_new();
+
+ if (!xdata)
+ goto wind;
+
+ ret = 0;
+ if (conf->max_file_size)
+ ret = dict_set(xdata, GF_CONTENT_KEY,
+ data_from_uint64(conf->max_file_size));
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_DICT_SET_FAILED,
+ "cannot set key in request dict (%s)", loc->path);
+wind:
+ STACK_WIND(frame, qr_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ if (new_xdata)
+ dict_unref(new_xdata);
-int32_t
-qr_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size, offset);
- return 0;
+ return 0;
}
-
-int32_t
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+int
+qr_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- qr_inode_t *qr_inode = NULL;
- qr_local_t *local = NULL;
- char just_validated = 0;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- uint64_t value = 0;
- int count = -1, flags = 0, i = 0;
- char content_cached = 0, need_validation = 0;
- char need_open = 0, can_wind = 0, need_unwind = 0;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct stat stbuf = {0, };
- data_t *content = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *stub = NULL;
- loc_t loc = {0, };
- qr_conf_t *conf = NULL;
- struct iovec *vector = NULL;
- char *path = NULL;
- glusterfs_ctx_t *ctx = NULL;
- off_t start = 0, end = 0;
- size_t len = 0;
- struct iobuf_pool *iobuf_pool = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- op_ret = 0;
-
- priv = this->private;
- conf = &priv->conf;
- table = &priv->table;
-
- local = frame->local;
- if (local != NULL) {
- just_validated = local->just_validated;
- FREE (local);
- frame->local = NULL;
- }
+ gf_dirent_t *entry = NULL;
+ qr_inode_t *qr_inode = NULL;
+ qr_local_t *local = NULL;
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- if (qr_fd_ctx != NULL) {
- if (qr_fd_ctx->disabled) {
- goto out;
- }
- }
- }
+ local = frame->local;
- iobuf_pool = this->ctx->iobuf_pool;
+ if (op_ret <= 0)
+ goto unwind;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode) {
- if (qr_inode->xattr){
- if (!just_validated
- && qr_need_validation (conf,
- qr_inode)) {
- need_validation = 1;
- goto unlock;
- }
-
- content = dict_get (qr_inode->xattr,
- GLUSTERFS_CONTENT_KEY);
-
- stbuf = qr_inode->stbuf;
- content_cached = 1;
- list_move_tail (&qr_inode->lru,
- &table->lru[qr_inode->priority]);
-
-
- if (offset > content->len) {
- op_ret = 0;
- end = content->len;
- } else {
- if ((offset + size)
- > content->len) {
- op_ret = content->len - offset;
- end = content->len;
- } else {
- op_ret = size;
- end = offset + size;
- }
- }
-
- ctx = this->ctx;
- count = (op_ret / iobuf_pool->page_size);
- if ((op_ret % iobuf_pool->page_size)
- != 0) {
- count++;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto unlock;
- }
-
- vector = CALLOC (count,
- sizeof (*vector));
- if (vector == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- for (i = 0; i < count; i++) {
- iobuf = iobuf_get (iobuf_pool);
- if (iobuf == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- start = offset + (iobuf_pool->page_size * i);
- if (start > end) {
- len = 0;
- } else {
- len = (iobuf_pool->page_size
- > (end - start))
- ? (end - start)
- : iobuf_pool->page_size;
-
- memcpy (iobuf->ptr,
- content->data + start,
- len);
- }
-
- iobref_add (iobref, iobuf);
- iobuf_unref (iobuf);
-
- vector[i].iov_base = iobuf->ptr;
- vector[i].iov_len = len;
- }
- }
- }
- }
- }
-unlock:
- UNLOCK (&table->lock);
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
-out:
- if (content_cached || need_unwind) {
- QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector,
- count, &stbuf, iobref);
-
- } else if (need_validation) {
- stub = fop_readv_stub (frame, qr_readv, fd, size, offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = qr_validate_cache (frame, this, fd, stub);
- if (op_ret == -1) {
- need_unwind = 1;
- op_errno = errno;
- call_stub_destroy (stub);
- goto out;
- }
- } else {
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_readv_stub (frame,
- qr_readv_helper,
- fd, size,
- offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto fdctx_unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- fdctx_unlock:
- UNLOCK (&qr_fd_ctx->lock);
-
- if (op_ret == -1) {
- need_unwind = 1;
- goto out;
- }
- } else {
- can_wind = 1;
- }
-
- if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- } else if (can_wind) {
- STACK_WIND (frame, qr_readv_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size,
- offset);
- }
+ qr_inode = qr_inode_ctx_get(this, entry->inode);
+ if (!qr_inode)
+ /* no harm */
+ continue;
- }
+ qr_content_refresh(this, qr_inode, &entry->d_stat, local->incident_gen);
+ }
- if (vector) {
- FREE (vector);
- }
+unwind:
+ QR_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
- if (iobref) {
- iobref_unref (iobref);
- }
+int
+qr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
- return 0;
-}
+ local = qr_local_get(this, NULL);
+ frame->local = local;
+ STACK_WIND(frame, qr_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+ return 0;
+}
-int32_t
-qr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+int
+qr_readv_cached(call_frame_t *frame, qr_inode_t *qr_inode, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- QR_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
+ xlator_t *this = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ int op_ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ struct iatt buf = {
+ 0,
+ };
+ this = frame->this;
+ priv = this->private;
+ table = &priv->table;
-int32_t
-qr_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
-{
- STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count, off,
- iobref);
- return 0;
-}
+ LOCK(&table->lock);
+ {
+ if (!qr_inode->data)
+ goto unlock;
+ if (offset >= qr_inode->size)
+ goto unlock;
-int32_t
-qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
-{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_inode_t *qr_inode = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t op_ret = -1, op_errno = -1, ret = -1;
- char can_wind = 0, need_unwind = 0, need_open = 0;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- priv = this->private;
- table = &priv->table;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ if (!__qr_cache_is_fresh(this, qr_inode))
+ goto unlock;
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long)value;
- if (qr_inode != NULL) {
- inode_ctx_del (fd->inode, this, NULL);
- __qr_inode_free (qr_inode);
- }
- }
- }
- UNLOCK (&table->lock);
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_writev_stub (frame, qr_writev_helper,
- fd, vector, count, off,
- iobref);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
+ op_ret = min(size, (qr_inode->size - offset));
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, op_ret);
+ if (!iobuf) {
+ op_ret = -1;
+ goto unlock;
}
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count,
- off, iobref);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
+ iobref = iobref_new();
+ if (!iobref) {
+ op_ret = -1;
+ goto unlock;
}
- return 0;
-}
+ iobref_add(iobref, iobuf);
+ memcpy(iobuf->ptr, qr_inode->data + offset, op_ret);
-int32_t
-qr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
-{
- QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
- return 0;
-}
+ buf = qr_inode->buf;
+ /* bump LRU */
+ __qr_inode_register(frame->this, table, qr_inode);
+ }
+unlock:
+ UNLOCK(&table->lock);
-int32_t
-qr_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- return 0;
-}
+ if (op_ret >= 0) {
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = op_ret;
+ GF_ATOMIC_INC(priv->qr_counter.cache_hit);
+ STACK_UNWIND_STRICT(readv, frame, op_ret, 0, &iov, 1, &buf, iobref,
+ xdata);
+ } else {
+ GF_ATOMIC_INC(priv->qr_counter.cache_miss);
+ }
-int32_t
-qr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- char need_open = 0, can_wind = 0, need_unwind = 0;
- uint64_t value = 0;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- loc_t loc = {0, };
- char *path = NULL;
- int flags = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ if (iobuf)
+ iobuf_unref(iobuf);
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fstat_stub (frame, qr_fstat_helper,
- fd);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ if (iobref)
+ iobref_unref(iobref);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ return op_ret;
}
+int
+qr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ qr_inode_t *qr_inode = NULL;
+
+ qr_inode = qr_inode_ctx_get(this, fd->inode);
+ if (!qr_inode)
+ goto wind;
+ if (qr_readv_cached(frame, qr_inode, size, offset, flags, xdata) < 0)
+ goto wind;
+ return 0;
+wind:
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
int32_t
-qr_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *preop, struct stat *postop)
+qr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, preop, postop);
- return 0;
-}
+ qr_local_t *local = NULL;
+ local = frame->local;
-int32_t
-qr_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct stat *stbuf, int32_t valid)
-{
- STACK_WIND(frame, qr_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf,
- valid);
- return 0;
-}
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
+ QR_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
-int32_t
-qr_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct stat *stbuf, int32_t valid)
+int
+qr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ qr_local_t *local = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsetattr_stub (frame,
- qr_fsetattr_helper,
- fd, stbuf, valid);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf,
- valid);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
+ frame->local = local;
- return 0;
+ STACK_WIND(frame, qr_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, iov, count, offset, flags,
+ iobref, xdata);
+ return 0;
}
-
int32_t
-qr_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+qr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno);
- return 0;
-}
+ qr_local_t *local = NULL;
+ local = frame->local;
+ qr_inode_prune(this, local->inode, local->incident_gen);
-int32_t
-qr_fsetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
-{
- STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags);
- return 0;
+ QR_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-
-int32_t
-qr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+int
+qr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- int open_flags = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ qr_local_t *local = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- open_flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsetxattr_stub (frame,
- qr_fsetxattr_helper,
- fd, dict, flags);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ local = qr_local_get(this, loc->inode);
+ local->inode = inode_ref(loc->inode);
+ frame->local = local;
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict,
- flags);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, open_flags,
- fd, 0);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ STACK_WIND(frame, qr_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
-
int32_t
-qr_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+qr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- QR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict);
- return 0;
-}
+ qr_local_t *local = NULL;
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
-int32_t
-qr_fgetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
-{
- STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- return 0;
+ QR_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-
-int32_t
-qr_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name)
+int
+qr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- /*
- * FIXME: Can quick-read use the extended attributes stored in the
- * cache? this needs to be discussed.
- */
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ qr_local_t *local = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fgetxattr_stub (frame,
- qr_fgetxattr_helper,
- fd, name);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (open, frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ STACK_WIND(frame, qr_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
-
int32_t
-qr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+qr_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- QR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- return 0;
-}
+ qr_local_t *local = NULL;
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
-int32_t
-qr_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd);
- return 0;
+ QR_STACK_UNWIND(fallocate, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
}
-
-int32_t
-qr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+static int
+qr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int keep_size,
+ off_t offset, size_t len, dict_t *xdata)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ qr_local_t *local = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else if (qr_fd_ctx->open_in_transit) {
- stub = fop_flush_stub (frame, qr_flush_helper,
- fd);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- } else {
- op_ret = 0;
- need_unwind = 1;
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
- if (need_unwind) {
- QR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd);
- }
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
- return 0;
+ STACK_WIND(frame, qr_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len,
+ xdata);
+ return 0;
}
-
int32_t
-qr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+qr_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- return 0;
+ qr_local_t *local = NULL;
+
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
+
+ QR_STACK_UNWIND(discard, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
}
-int32_t
-qr_fentrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+static int
+qr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- STACK_WIND(frame, qr_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename,
- cmd, type);
- return 0;
-}
+ qr_local_t *local = NULL;
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
+
+ STACK_WIND(frame, qr_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
int32_t
-qr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+qr_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ qr_local_t *local = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fentrylk_stub (frame,
- qr_fentrylk_helper,
- volume, fd, basename,
- cmd, type);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ local = frame->local;
+ qr_inode_prune(this, local->fd->inode, local->incident_gen);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk, volume, fd,
- basename, cmd, type);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ QR_STACK_UNWIND(zerofill, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
}
+static int
+qr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ qr_local_t *local = NULL;
-int32_t
-qr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ local = qr_local_get(this, fd->inode);
+ local->fd = fd_ref(fd);
+ frame->local = local;
-{
- QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_WIND(frame, qr_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
}
-
-int32_t
-qr_finodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *lock)
+int
+qr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock);
- return 0;
-}
+ qr_inode_set_priority(this, fd->inode, loc->path);
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
-int32_t
-qr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- int32_t cmd, struct flock *lock)
+int
+qr_forget(xlator_t *this, inode_t *inode)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ qr_inode_t *qr_inode = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_finodelk_stub (frame,
- qr_finodelk_helper,
- volume, fd, cmd,
- lock);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ qr_inode = qr_inode_ctx_get(this, inode);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, volume, fd,
- cmd, lock);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
-
+ if (!qr_inode)
return 0;
-}
+ qr_inode_prune(this, inode, qr_get_generation(this, inode));
-int32_t
-qr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *prebuf, struct stat *postbuf)
-{
- QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
+ GF_FREE(qr_inode);
+ return 0;
+}
int32_t
-qr_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+qr_inodectx_dump(xlator_t *this, inode_t *inode)
{
- STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
- return 0;
+ qr_inode_t *qr_inode = NULL;
+ int32_t ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char buf[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ qr_inode = qr_inode_ctx_get(this, inode);
+ if (!qr_inode)
+ goto out;
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read",
+ "inodectx");
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("entire-file-cached", "%s",
+ qr_inode->data ? "yes" : "no");
+
+ if (qr_inode->last_refresh) {
+ gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT);
+ gf_proc_dump_write("last-cache-validation-time", "%s", buf);
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-int32_t
-qr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+int
+qr_priv_dump(xlator_t *this)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- int open_flags = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- open_flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsync_stub (frame, qr_fsync_helper,
- fd, flags);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ uint32_t file_count = 0;
+ uint32_t i = 0;
+ qr_inode_t *curr = NULL;
+ uint64_t total_size = 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync, fd, flags);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, open_flags,
- fd, 0);
-
- qr_loc_wipe (&loc);
- }
+ if (!this) {
+ return -1;
+ }
- return 0;
-}
+ priv = this->private;
+ conf = &priv->conf;
+ if (!conf)
+ return -1;
+ table = &priv->table;
-int32_t
-qr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
-{
- int32_t ret = 0;
- uint64_t value = 0;
- qr_inode_t *qr_inode = NULL;
- qr_local_t *local = NULL;
- qr_private_t *priv = NULL;
- qr_inode_table_t *table = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read", "priv");
- priv = this->private;
- table = &priv->table;
+ gf_proc_dump_add_section("%s", key_prefix);
- local = frame->local;
- if ((local == NULL) || (local->fd == NULL)
- || (local->fd->inode == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ gf_proc_dump_write("max_file_size", "%" PRIu64, conf->max_file_size);
+ gf_proc_dump_write("cache_timeout", "%d", conf->cache_timeout);
- LOCK (&table->lock);
- {
- ret = inode_ctx_get (local->fd->inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
-
- if (qr_inode) {
- if (qr_inode->stbuf.st_size != postbuf->st_size)
- {
- inode_ctx_del (local->fd->inode, this,
- NULL);
- __qr_inode_free (qr_inode);
- }
- }
- }
- }
- UNLOCK (&table->lock);
+ if (!table) {
+ goto out;
+ } else {
+ for (i = 0; i < conf->max_pri; i++) {
+ list_for_each_entry(curr, &table->lru[i], lru)
+ {
+ file_count++;
+ total_size += curr->size;
+ }
+ }
+ }
+
+ gf_proc_dump_write("total_files_cached", "%d", file_count);
+ gf_proc_dump_write("total_cache_used", "%" PRIu64, total_size);
+ gf_proc_dump_write("cache-hit", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(priv->qr_counter.cache_hit));
+ gf_proc_dump_write("cache-miss", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(priv->qr_counter.cache_miss));
+ gf_proc_dump_write("cache-invalidations", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(priv->qr_counter.file_data_invals));
out:
- QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ return 0;
}
-
-int32_t
-qr_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+static int32_t
+qr_dump_metrics(xlator_t *this, int fd)
{
- STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+
+ priv = this->private;
+ table = &priv->table;
+
+ dprintf(fd, "%s.total_files_cached %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.files_cached));
+ dprintf(fd, "%s.total_cache_used %" PRId64 "\n", this->name,
+ table->cache_used);
+ dprintf(fd, "%s.cache-hit %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.cache_hit));
+ dprintf(fd, "%s.cache-miss %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.cache_miss));
+ dprintf(fd, "%s.cache-invalidations %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(priv->qr_counter.file_data_invals));
+
+ return 0;
}
-
int32_t
-qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+qr_mem_acct_init(xlator_t *this)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_local_t *local = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
+ int ret = -1;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto out;
- }
+ if (!this)
+ return ret;
- local->fd = fd;
- frame->local = local;
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_ftruncate_stub (frame,
- qr_ftruncate_helper,
- fd, offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ ret = xlator_mem_acct_init(this, gf_qr_mt_end + 1);
-out:
- if (need_unwind) {
- QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
- NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, QUICK_READ_MSG_NO_MEMORY,
+ "Memory accounting init failed");
+ return ret;
+ }
- return 0;
+ return ret;
}
-
-int32_t
-qr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
+static gf_boolean_t
+check_cache_size_ok(xlator_t *this, int64_t cache_size)
{
- QR_STACK_UNWIND (lk, frame, op_ret, op_errno, lock);
- return 0;
+ int ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT(this);
+ opt = xlator_volume_option_get(this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ QUICK_READ_MSG_INVALID_ARGUMENT,
+ "could not get cache-size option");
+ goto out;
+ }
+
+ total_mem = get_mem_size();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, 0, QUICK_READ_MSG_INVALID_ARGUMENT,
+ "Cache size %" PRIu64
+ " is greater than the max size of %" PRIu64,
+ cache_size, max_cache_size);
+ goto out;
+ }
+out:
+ return ret;
}
-
-int32_t
-qr_lk_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+int
+qr_reconfigure(xlator_t *this, dict_t *options)
{
- STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock);
+ int32_t ret = -1;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ uint64_t cache_size_new = 0;
- return 0;
-}
+ GF_VALIDATE_OR_GOTO("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, options, out);
+
+ priv = this->private;
+
+ conf = &priv->conf;
+ if (!conf) {
+ goto out;
+ }
+
+ GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out);
+
+ GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation,
+ options, bool, out);
+
+ GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64, out);
+ if (!check_cache_size_ok(this, cache_size_new)) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, QUICK_READ_MSG_INVALID_CONFIG,
+ "Not reconfiguring cache-size");
+ goto out;
+ }
+ conf->cache_size = cache_size_new;
+ ret = 0;
+out:
+ return ret;
+}
int32_t
-qr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+qr_get_priority_list(const char *opt_str, struct list_head *first)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_lk_stub (frame, qr_lk_helper, fd,
- cmd, lock);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *priority_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("quick-read", opt_str, out);
+ GF_VALIDATE_OR_GOTO("quick-read", first, out);
+
+ string = gf_strdup(opt_str);
+ if (string == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ priority_str = strtok_r(string, ",", &tmp_str);
+ while (priority_str) {
+ curr = GF_CALLOC(1, sizeof(*curr), gf_qr_mt_qr_priority_t);
+ if (curr == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ list_add_tail(&curr->list, first);
+
+ dup_str = gf_strdup(priority_str);
+ if (dup_str == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ if (!pattern) {
+ max_pri = -1;
+ goto out;
+ }
+
+ priority = strtok_r(NULL, ":", &tmp_str1);
+ if (!priority) {
+ max_pri = -1;
+ goto out;
+ }
+
+ gf_msg_trace("quick-read", 0,
+ "quick-read priority : pattern %s : priority %s", pattern,
+ priority);
+
+ curr->pattern = gf_strdup(pattern);
+ if (curr->pattern == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+
+ curr->priority = strtol(priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
+ max_pri = -1;
+ goto out;
} else {
- can_wind = 1;
+ max_pri = max(max_pri, curr->priority);
}
+ GF_FREE(dup_str);
+ dup_str = NULL;
+
+ priority_str = strtok_r(NULL, ",", &tmp_str);
+ }
out:
- if (need_unwind) {
- QR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd, 0);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
+ GF_FREE(string);
+ GF_FREE(dup_str);
-int32_t
-qr_release (xlator_t *this, fd_t *fd)
-{
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = 0;
- uint64_t value = 0;
-
- ret = fd_ctx_del (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- if (qr_fd_ctx) {
- qr_fd_ctx_free (qr_fd_ctx);
- }
+ if (max_pri == -1) {
+ list_for_each_entry_safe(curr, tmp, first, list)
+ {
+ list_del_init(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
}
+ }
- return 0;
+ return max_pri;
}
-
int32_t
-qr_forget (xlator_t *this, inode_t *inode)
+qr_init(xlator_t *this)
{
- qr_inode_t *qr_inode = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
- qr_private_t *priv = NULL;
-
- priv = this->private;
-
- LOCK (&priv->table.lock);
- {
- ret = inode_ctx_del (inode, this, &value);
- if (ret == 0) {
- qr_inode = (qr_inode_t *)(long) value;
- __qr_inode_free (qr_inode);
- }
- }
- UNLOCK (&priv->table.lock);
+ int32_t ret = -1, i = 0;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child",
+ this->name);
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_qr_mt_qr_private_t);
+ if (priv == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ LOCK_INIT(&priv->table.lock);
+ conf = &priv->conf;
+
+ GF_OPTION_INIT("max-file-size", conf->max_file_size, size_uint64, out);
+
+ GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out);
+
+ GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool,
+ out);
+
+ GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out);
+ if (!check_cache_size_ok(this, conf->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_INIT("ctime-invalidation", conf->ctime_invalidation, bool, out);
+
+ INIT_LIST_HEAD(&conf->priority_list);
+ conf->max_pri = 1;
+ if (dict_get(this->options, "priority")) {
+ char *option_list = data_to_str(dict_get(this->options, "priority"));
+ gf_msg_trace(this->name, 0, "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ conf->max_pri = qr_get_priority_list(option_list, &conf->priority_list);
+
+ if (conf->max_pri == -1) {
+ goto out;
+ }
+ conf->max_pri++;
+ }
+
+ priv->table.lru = GF_CALLOC(conf->max_pri, sizeof(*priv->table.lru),
+ gf_common_mt_list_head);
+ if (priv->table.lru == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < conf->max_pri; i++) {
+ INIT_LIST_HEAD(&priv->table.lru[i]);
+ }
+
+ ret = 0;
+
+ priv->last_child_down = gf_time();
+ GF_ATOMIC_INIT(priv->generation, 0);
+ this->private = priv;
+out:
+ if ((ret == -1) && priv) {
+ GF_FREE(priv);
+ }
- return 0;
+ return ret;
}
-int
-qr_priv_dump (xlator_t *this)
+void
+qr_inode_table_destroy(qr_private_t *priv)
{
- qr_conf_t *conf = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- qr_private_t *priv = NULL;
-
- if (!this)
- return -1;
-
- priv = this->private;
- conf = &priv->conf;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
+ int i = 0;
+ qr_conf_t *conf = NULL;
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.quick-read",
- "priv");
+ conf = &priv->conf;
- gf_proc_dump_add_section (key_prefix);
+ for (i = 0; i < conf->max_pri; i++) {
+ /* There is a known leak of inodes, hence until
+ * that is fixed, log the assert as warning.
+ GF_ASSERT (list_empty (&priv->table.lru[i]));*/
+ if (!list_empty(&priv->table.lru[i])) {
+ gf_msg("quick-read", GF_LOG_INFO, 0, QUICK_READ_MSG_LRU_NOT_EMPTY,
+ "quick read inode table lru not empty");
+ }
+ }
- gf_proc_dump_build_key (key, key_prefix, "max_file_size");
- gf_proc_dump_write (key, "%d", conf->max_file_size);
- gf_proc_dump_build_key (key, key_prefix, "cache_timeout");
- gf_proc_dump_write (key, "%d", conf->cache_timeout);
+ LOCK_DESTROY(&priv->table.lock);
- return 0;
+ return;
}
-
-int32_t
-qr_get_priority_list (const char *opt_str, struct list_head *first)
+void
+qr_conf_destroy(qr_conf_t *conf)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *priority_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct qr_priority *curr = NULL, *tmp = NULL;
-
- string = strdup (opt_str);
- if (string == NULL) {
- max_pri = -1;
- goto out;
- }
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- priority_str = strtok_r (string, ",", &tmp_str);
- while (priority_str) {
- curr = CALLOC (1, sizeof (*curr));
- if (curr == NULL) {
- max_pri = -1;
- goto out;
- }
-
- list_add_tail (&curr->list, first);
-
- dup_str = strdup (priority_str);
- if (dup_str == NULL) {
- max_pri = -1;
- goto out;
- }
-
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
- max_pri = -1;
- goto out;
- }
-
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
- max_pri = -1;
- goto out;
- }
-
- gf_log ("quick-read", GF_LOG_TRACE,
- "quick-read priority : pattern %s : priority %s",
- pattern,
- priority);
-
- curr->pattern = strdup (pattern);
- if (curr->pattern == NULL) {
- max_pri = -1;
- goto out;
- }
-
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
- max_pri = -1;
- goto out;
- } else {
- max_pri = max (max_pri, curr->priority);
- }
-
- free (dup_str);
- dup_str = NULL;
-
- priority_str = strtok_r (NULL, ",", &tmp_str);
- }
-out:
- if (string != NULL) {
- free (string);
- }
-
- if (dup_str != NULL) {
- free (dup_str);
- }
+ struct qr_priority *curr = NULL, *tmp = NULL;
- if (max_pri == -1) {
- list_for_each_entry_safe (curr, tmp, first, list) {
- list_del_init (&curr->list);
- free (curr->pattern);
- free (curr);
- }
- }
+ list_for_each_entry_safe(curr, tmp, &conf->priority_list, list)
+ {
+ list_del(&curr->list);
+ GF_FREE(curr->pattern);
+ GF_FREE(curr);
+ }
- return max_pri;
+ return;
}
-
-int32_t
-init (xlator_t *this)
+void
+qr_update_child_down_time(xlator_t *this, time_t now)
{
- char *str = NULL;
- int32_t ret = -1, i = 0;
- qr_private_t *priv = NULL;
- qr_conf_t *conf = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
- return -1;
- }
+ qr_private_t *priv = NULL;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- priv = CALLOC (1, sizeof (*priv));
- if (priv == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- ret = -1;
- goto out;
- }
+ priv = this->private;
- LOCK_INIT (&priv->table.lock);
-
- conf = &priv->conf;
-
- conf->max_file_size = 65536;
- ret = dict_get_str (this->options, "max-file-size",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str, &conf->max_file_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "max-file-size\"",
- str);
- ret = -1;
- goto out;
- }
- }
+ LOCK(&priv->lock);
+ {
+ priv->last_child_down = now;
+ }
+ UNLOCK(&priv->lock);
+}
- conf->cache_timeout = 1;
- ret = dict_get_str (this->options, "cache-timeout", &str);
- if (ret == 0) {
- ret = gf_string2uint_base10 (str,
- (unsigned int *)&conf->cache_timeout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cache-timeout value %s", str);
- ret = -1;
- goto out;
- }
- }
+static int
+qr_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ qr_private_t *priv = NULL;
- conf->cache_size = QR_DEFAULT_CACHE_SIZE;
- ret = dict_get_str (this->options, "cache-size", &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str, &conf->cache_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cache-size value %s", str);
- ret = -1;
- goto out;
- }
- }
+ up_data = (struct gf_upcall *)data;
- INIT_LIST_HEAD (&conf->priority_list);
- conf->max_pri = 1;
- if (dict_get (this->options, "priority")) {
- char *option_list = data_to_str (dict_get (this->options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- conf->max_pri = qr_get_priority_list (option_list,
- &conf->priority_list);
-
- if (conf->max_pri == -1) {
- goto out;
- }
- conf->max_pri ++;
- }
-
- priv->table.lru = CALLOC (conf->max_pri,
- sizeof (*priv->table.lru));
- if (priv->table.lru == NULL) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
- for (i = 0; i < conf->max_pri; i++) {
- INIT_LIST_HEAD (&priv->table.lru[i]);
- }
+ priv = this->private;
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
- ret = 0;
+ if (up_ci && (up_ci->flags & UP_WRITE_FLAGS)) {
+ GF_ATOMIC_INC(priv->qr_counter.file_data_invals);
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+ qr_inode_prune(this, inode, qr_get_generation(this, inode));
+ }
- this->private = priv;
out:
- if ((ret == -1) && priv) {
- FREE (priv);
- }
+ if (inode)
+ inode_unref(inode);
- return ret;
+ return ret;
}
+int
+qr_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ priv = this->private;
+ conf = &priv->conf;
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ qr_update_child_down_time(this, gf_time());
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->qr_invalidation)
+ ret = qr_invalidate(this, data);
+ break;
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
void
-fini (xlator_t *this)
+qr_fini(xlator_t *this)
{
- return;
-}
+ qr_private_t *priv = NULL;
+ if (this == NULL) {
+ goto out;
+ }
-struct xlator_fops fops = {
- .lookup = qr_lookup,
- .open = qr_open,
- .readv = qr_readv,
- .writev = qr_writev,
- .fstat = qr_fstat,
- .fsetxattr = qr_fsetxattr,
- .fgetxattr = qr_fgetxattr,
- .flush = qr_flush,
- .fentrylk = qr_fentrylk,
- .finodelk = qr_finodelk,
- .fsync = qr_fsync,
- .ftruncate = qr_ftruncate,
- .lk = qr_lk,
- .fsetattr = qr_fsetattr,
-};
+ priv = this->private;
+ if (priv == NULL) {
+ goto out;
+ }
+ qr_inode_table_destroy(priv);
+ qr_conf_destroy(&priv->conf);
-struct xlator_mops mops = {
-};
+ this->private = NULL;
+ GF_FREE(priv);
+out:
+ return;
+}
-struct xlator_cbks cbks = {
- .forget = qr_forget,
- .release = qr_release,
+struct xlator_fops qr_fops = {.lookup = qr_lookup,
+ .readdirp = qr_readdirp,
+ .open = qr_open,
+ .readv = qr_readv,
+ .writev = qr_writev,
+ .truncate = qr_truncate,
+ .ftruncate = qr_ftruncate,
+ .fallocate = qr_fallocate,
+ .discard = qr_discard,
+ .zerofill = qr_zerofill};
+
+struct xlator_cbks qr_cbks = {
+ .forget = qr_forget,
};
-struct xlator_dumpops dumpops = {
- .priv = qr_priv_dump,
+struct xlator_dumpops qr_dumpops = {
+ .priv = qr_priv_dump,
+ .inodectx = qr_inodectx_dump,
};
-struct volume_options options[] = {
- { .key = {"cache-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 60
- },
- { .key = {"max-file-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 1 * GF_UNIT_KB * 1000,
- },
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 6 * GF_UNIT_GB,
- },
+struct volume_options qr_options[] = {
+ {
+ .key = {"quick-read"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable quick-read",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"priority"}, .type = GF_OPTION_TYPE_ANY},
+ {.key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = INFINITY,
+ .default_value = "128MB",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "Size of small file read cache."},
+ {
+ .key = {"cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ {
+ .key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 1 * GF_UNIT_KB * 1000,
+ .default_value = "64KB",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ {
+ .key = {"quick-read-cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "When \"on\", invalidates/updates the metadata cache,"
+ " on receiving the cache-invalidation notifications",
+ },
+ {
+ .key = {"ctime-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_5_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "Quick-read by default uses mtime to identify changes "
+ "to file data. However there are applications like "
+ "rsync which explicitly set mtime making it unreliable "
+ "for the purpose of identifying change in file content "
+ ". Since ctime also changes when content of a file "
+ " changes and it cannot be set explicitly, it becomes "
+ " suitable for identifying staleness of cached data. "
+ "This option makes quick-read to prefer ctime over "
+ "mtime to validate its cache. However, using ctime "
+ "can result in false positives as ctime changes with "
+ "just attribute changes like permission without "
+ "changes to file data. So, use this only when mtime "
+ "is not reliable",
+ },
+ {.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = qr_init,
+ .fini = qr_fini,
+ .notify = qr_notify,
+ .reconfigure = qr_reconfigure,
+ .mem_acct_init = qr_mem_acct_init,
+ .dump_metrics = qr_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &qr_dumpops,
+ .fops = &qr_fops,
+ .cbks = &qr_cbks,
+ .options = qr_options,
+ .identifier = "quick-read",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h
index 8bf966c6397..20fcc70b3a7 100644
--- a/xlators/performance/quick-read/src/quick-read.h
+++ b/xlators/performance/quick-read/src/quick-read.h
@@ -1,119 +1,91 @@
/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __QUICK_READ_H
#define __QUICK_READ_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "list.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "common-utils.h"
-#include "call-stub.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
#include <libgen.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fnmatch.h>
-
-#define GLUSTERFS_CONTENT_KEY "glusterfs.content"
-
-struct qr_fd_ctx {
- char opened;
- char disabled;
- char open_in_transit;
- char *path;
- int flags;
- struct list_head waiting_ops;
- gf_lock_t lock;
-};
-typedef struct qr_fd_ctx qr_fd_ctx_t;
-
-struct qr_local {
- char is_open;
- char just_validated;
- char *path;
- fd_t *fd;
- int open_flags;
- int32_t op_ret;
- int32_t op_errno;
- call_stub_t *stub;
-};
-typedef struct qr_local qr_local_t;
+#include "quick-read-mem-types.h"
struct qr_inode {
- dict_t *xattr;
- inode_t *inode;
- int priority;
- struct stat stbuf;
- struct timeval tv;
- struct list_head lru;
+ void *data;
+ size_t size;
+ int priority;
+ uint32_t ia_mtime;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime;
+ uint32_t ia_ctime_nsec;
+ uint32_t gen_rollover;
+ struct iatt buf;
+ time_t last_refresh;
+ struct list_head lru;
+ uint64_t gen;
+ uint64_t invalidation_time;
};
typedef struct qr_inode qr_inode_t;
struct qr_priority {
- char *pattern;
- int32_t priority;
- struct list_head list;
+ char *pattern;
+ int32_t priority;
+ struct list_head list;
};
typedef struct qr_priority qr_priority_t;
struct qr_conf {
- uint64_t max_file_size;
- int32_t cache_timeout;
- uint64_t cache_size;
- int max_pri;
- struct list_head priority_list;
+ uint64_t max_file_size;
+ int32_t cache_timeout;
+ uint64_t cache_size;
+ int max_pri;
+ gf_boolean_t qr_invalidation;
+ gf_boolean_t ctime_invalidation;
+ struct list_head priority_list;
};
typedef struct qr_conf qr_conf_t;
struct qr_inode_table {
- uint64_t cache_used;
- struct list_head *lru;
- gf_lock_t lock;
+ uint64_t cache_used;
+ struct list_head *lru;
+ gf_lock_t lock;
};
typedef struct qr_inode_table qr_inode_table_t;
+struct qr_statistics {
+ gf_atomic_t cache_hit;
+ gf_atomic_t cache_miss;
+ gf_atomic_t file_data_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t files_cached;
+};
+
struct qr_private {
- qr_conf_t conf;
- qr_inode_table_t table;
+ qr_conf_t conf;
+ qr_inode_table_t table;
+ time_t last_child_down;
+ gf_lock_t lock;
+ struct qr_statistics qr_counter;
+ gf_atomic_int32_t generation;
};
typedef struct qr_private qr_private_t;
-void qr_local_free (qr_local_t *local);
-
-#define QR_STACK_UNWIND(op, frame, params ...) do { \
- qr_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND_STRICT (op, frame, params); \
- qr_local_free (__local); \
-} while (0)
-
-
#endif /* #ifndef __QUICK_READ_H */
diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am
index 7bb90228227..99efca3660c 100644
--- a/xlators/performance/read-ahead/src/Makefile.am
+++ b/xlators/performance/read-ahead/src/Makefile.am
@@ -1,14 +1,16 @@
xlator_LTLIBRARIES = read-ahead.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-read_ahead_la_LDFLAGS = -module -avoidversion
+read_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
read_ahead_la_SOURCES = read-ahead.c page.c
read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = read-ahead.h
+noinst_HEADERS = read-ahead.h read-ahead-mem-types.h read-ahead-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c
index 07ab84ed8d7..8a58ad8bb7a 100644
--- a/xlators/performance/read-ahead/src/page.c
+++ b/xlators/performance/read-ahead/src/page.c
@@ -1,415 +1,455 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "read-ahead.h"
#include <assert.h>
+#include "read-ahead-messages.h"
ra_page_t *
-ra_page_get (ra_file_t *file, off_t offset)
+ra_page_get(ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ GF_VALIDATE_OR_GOTO("read-ahead", file, out);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ page = file->pages.next;
+ rounded_offset = gf_floor(offset, file->page_size);
- if (page == &file->pages || page->offset != rounded_offset)
- page = NULL;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- return page;
-}
+ if (page == &file->pages || page->offset != rounded_offset)
+ page = NULL;
+out:
+ return page;
+}
ra_page_t *
-ra_page_create (ra_file_t *file, off_t offset)
+ra_page_create(ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
- ra_page_t *newpage = NULL;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ra_page_t *newpage = NULL;
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ GF_VALIDATE_OR_GOTO("read-ahead", file, out);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ page = file->pages.next;
+ rounded_offset = gf_floor(offset, file->page_size);
- if (page == &file->pages || page->offset != rounded_offset) {
- newpage = CALLOC (1, sizeof (*newpage));
- if (!newpage)
- return NULL;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- newpage->offset = rounded_offset;
- newpage->prev = page->prev;
- newpage->next = page;
- newpage->file = file;
- page->prev->next = newpage;
- page->prev = newpage;
+ if (page == &file->pages || page->offset != rounded_offset) {
+ newpage = GF_CALLOC(1, sizeof(*newpage), gf_ra_mt_ra_page_t);
+ if (!newpage) {
+ goto out;
+ }
- page = newpage;
- }
+ newpage->offset = rounded_offset;
+ newpage->prev = page->prev;
+ newpage->next = page;
+ newpage->file = file;
+ page->prev->next = newpage;
+ page->prev = newpage;
- return page;
-}
+ page = newpage;
+ }
+out:
+ return page;
+}
void
-ra_wait_on_page (ra_page_t *page, call_frame_t *frame)
+ra_wait_on_page(ra_page_t *page, call_frame_t *frame)
{
- ra_waitq_t *waitq = NULL;
- ra_local_t *local = NULL;
-
- local = frame->local;
- waitq = CALLOC (1, sizeof (*waitq));
- if (!waitq) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "out of memory");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
+ ra_waitq_t *waitq = NULL;
+ ra_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, page, out);
+
+ local = frame->local;
+
+ waitq = GF_CALLOC(1, sizeof(*waitq), gf_ra_mt_ra_waitq_t);
+ if (!waitq) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
- waitq->data = frame;
- waitq->next = page->waitq;
- page->waitq = waitq;
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ page->waitq = waitq;
- ra_local_lock (local);
- {
- local->wait_count++;
- }
- ra_local_unlock (local);
+ ra_local_lock(local);
+ {
+ local->wait_count++;
+ }
+ ra_local_unlock(local);
out:
- return;
+ return;
}
-
void
-ra_waitq_return (ra_waitq_t *waitq)
+ra_waitq_return(ra_waitq_t *waitq)
{
- ra_waitq_t *trav = NULL;
- ra_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ra_waitq_t *trav = NULL;
+ ra_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ra_frame_return (frame);
- free (trav);
- }
-}
+ frame = trav->data;
+ ra_frame_return(frame);
+ GF_FREE(trav);
+ }
+ return;
+}
int
-ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ra_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- ra_local_t *local = NULL;
- off_t pending_offset = 0;
- ra_file_t *file = NULL;
- ra_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- ra_waitq_t *waitq = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fd = local->fd;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
-
- file = (ra_file_t *)(long)tmp_file;
- pending_offset = local->pending_offset;
- trav_offset = pending_offset;
- payload_size = op_ret;
-
- ra_file_lock (file);
- {
- if (op_ret >= 0)
- file->stbuf = *stbuf;
-
- if (op_ret < 0) {
- page = ra_page_get (file, pending_offset);
- if (page)
- waitq = ra_page_error (page, op_ret, op_errno);
- goto unlock;
- }
-
- page = ra_page_get (file, pending_offset);
- if (!page) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
- pending_offset, file->page_size, file);
- goto unlock;
- }
-
- if (page->vector) {
- iobref_unref (page->iobref);
- free (page->vector);
- }
-
- page->vector = iov_dup (vector, count);
- if (page->vector == NULL) {
- waitq = ra_page_error (page, -1, ENOMEM);
- goto unlock;
- }
-
- page->count = count;
- page->iobref = iobref_ref (iobref);
- page->ready = 1;
-
- page->size = iov_length (vector, count);
-
- waitq = ra_page_wakeup (page);
- }
-unlock:
- ra_file_unlock (file);
-
- ra_waitq_return (waitq);
-
- fd_unref (local->fd);
+ ra_local_t *local = NULL;
+ off_t pending_offset = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ fd_t *fd = NULL;
+ uint64_t tmp_file = 0;
+ gf_boolean_t stale = _gf_false;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ fd = local->fd;
+
+ fd_ctx_get(fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ pending_offset = local->pending_offset;
+
+ if (file == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, EBADF,
+ READ_AHEAD_MSG_FD_CONTEXT_NOT_SET,
+ "read-ahead context not set in fd (%p)", fd);
+ op_ret = -1;
+ op_errno = EBADF;
+ goto out;
+ }
+
+ ra_file_lock(file);
+ {
+ if (op_ret >= 0)
+ file->stbuf = *stbuf;
+
+ page = ra_page_get(file, pending_offset);
+
+ if (!page) {
+ gf_msg_trace(this->name, 0,
+ "wasted copy: "
+ "%" PRId64 "[+%" PRId64 "] file=%p",
+ pending_offset, file->page_size, file);
+ goto unlock;
+ }
- free (frame->local);
- frame->local = NULL;
+ if (page->stale) {
+ page->stale = 0;
+ page->ready = 0;
+ stale = 1;
+ goto unlock;
+ }
- STACK_DESTROY (frame->root);
- return 0;
-}
+ /*
+ * "Dirty" means that the request was a pure read-ahead; it's
+ * set for requests we issue ourselves, and cleared when user
+ * requests are issued or put on the waitq. "Poisoned" means
+ * that we got a write while a read was still in flight, and we
+ * couldn't stop it so we marked it instead. If it's both
+ * dirty and poisoned by the time we get here, we cancel its
+ * effect so that a subsequent user read doesn't get data that
+ * we know is stale (because we made it stale ourselves). We
+ * can't use ESTALE because that has special significance.
+ * ECANCELED has no such special meaning, and is close to what
+ * we're trying to indicate.
+ */
+ if (page->dirty && page->poisoned) {
+ op_ret = -1;
+ op_errno = ECANCELED;
+ }
+ if (op_ret < 0) {
+ waitq = ra_page_error(page, op_ret, op_errno);
+ goto unlock;
+ }
-void
-ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
-{
- call_frame_t *fault_frame = NULL;
- ra_local_t *fault_local = NULL, *local = NULL;
- ra_page_t *page = NULL;
- ra_waitq_t *waitq = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- local = frame->local;
- fault_frame = copy_frame (frame);
- if (fault_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
+ if (page->vector) {
+ iobref_unref(page->iobref);
+ GF_FREE(page->vector);
}
- fault_local = CALLOC (1, sizeof (ra_local_t));
- if (fault_local == NULL) {
- STACK_DESTROY (fault_frame->root);
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
+ page->vector = iov_dup(vector, count);
+ if (page->vector == NULL) {
+ waitq = ra_page_error(page, -1, ENOMEM);
+ goto unlock;
}
- fault_frame->local = fault_local;
- fault_local->pending_offset = offset;
- fault_local->pending_size = file->page_size;
+ page->count = count;
+ page->iobref = iobref_ref(iobref);
+ page->ready = 1;
+
+ page->size = iov_length(vector, count);
- fault_local->fd = fd_ref (file->fd);
+ waitq = ra_page_wakeup(page);
+ }
+unlock:
+ ra_file_unlock(file);
- STACK_WIND (fault_frame, ra_fault_cbk,
- FIRST_CHILD (fault_frame->this),
- FIRST_CHILD (fault_frame->this)->fops->readv,
- file->fd, file->page_size, offset);
+ if (stale) {
+ STACK_WIND(frame, ra_fault_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->readv, local->fd,
+ local->pending_size, local->pending_offset, 0, NULL);
- return;
+ return 0;
+ }
-err:
- ra_file_lock (file);
- {
- page = ra_page_get (file, offset);
- if (page)
- waitq = ra_page_error (page, op_ret,
- op_errno);
- }
- ra_file_unlock (file);
-
- if (waitq != NULL) {
- ra_waitq_return (waitq);
- }
+ ra_waitq_return(waitq);
+
+ fd_unref(local->fd);
+
+ mem_put(frame->local);
+ frame->local = NULL;
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
}
void
-ra_frame_fill (ra_page_t *page, call_frame_t *frame)
+ra_page_fault(ra_file_t *file, call_frame_t *frame, off_t offset)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ra_fill_t *new = NULL;
-
- local = frame->local;
- fill = &local->fill;
-
- if (local->op_ret != -1 && page->size) {
- if (local->offset > page->offset)
- src_offset = local->offset - page->offset;
- else
- dst_offset = page->offset - local->offset;
-
- copy_size = min (page->size - src_offset,
- local->size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- fill = fill->next;
- while (fill != &local->fill) {
- if (fill->offset > page->offset) {
- break;
- }
- fill = fill->next;
- }
-
- new = CALLOC (1, sizeof (*new));
- if (new == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
-
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- NULL);
- new->vector = CALLOC (new->count, sizeof (struct iovec));
- if (new->vector == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- FREE (new);
- goto out;
- }
-
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- new->vector);
-
- new->next = fill;
- new->prev = new->next->prev;
- new->next->prev = new;
- new->prev->next = new;
-
- local->op_ret += copy_size;
- }
+ call_frame_t *fault_frame = NULL;
+ ra_local_t *fault_local = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, file, out);
+
+ fault_frame = copy_frame(frame);
+ if (fault_frame == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ fault_local = mem_get0(THIS->local_pool);
+ if (fault_local == NULL) {
+ STACK_DESTROY(fault_frame->root);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ fault_frame->local = fault_local;
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = file->page_size;
+
+ fault_local->fd = fd_ref(file->fd);
+
+ STACK_WIND(fault_frame, ra_fault_cbk, FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv, file->fd,
+ file->page_size, offset, 0, NULL);
+
+ return;
+
+err:
+ ra_file_lock(file);
+ {
+ page = ra_page_get(file, offset);
+ if (page)
+ waitq = ra_page_error(page, op_ret, op_errno);
+ }
+ ra_file_unlock(file);
+
+ if (waitq != NULL) {
+ ra_waitq_return(waitq);
+ }
out:
- return;
+ return;
}
-
void
-ra_frame_unwind (call_frame_t *frame)
+ra_frame_fill(ra_page_t *page, call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- int32_t count = 0;
- struct iovec *vector;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- ra_fill_t *next = NULL;
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fill = local->fill.next;
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ra_fill_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, page, out);
+
+ local = frame->local;
+ fill = &local->fill;
+
+ if (local->op_ret != -1 && page->size) {
+ if (local->offset > page->offset)
+ src_offset = local->offset - page->offset;
+ else
+ dst_offset = page->offset - local->offset;
+
+ copy_size = min(page->size - src_offset, local->size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
}
- frame->local = NULL;
+ fill = fill->next;
+ while (fill != &local->fill) {
+ if (fill->offset > page->offset) {
+ break;
+ }
+ fill = fill->next;
+ }
- while (fill != &local->fill) {
- count += fill->count;
- fill = fill->next;
- }
+ new = GF_CALLOC(1, sizeof(*new), gf_ra_mt_ra_fill_t);
+ if (new == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
- vector = CALLOC (count, sizeof (*vector));
- if (vector == NULL) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- iobref_unref (iobref);
- iobref = NULL;
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref(page->iobref);
+ new->count = iov_subset(page->vector, page->count, src_offset,
+ copy_size, &new->vector, 0);
+ if (new->count < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(new->iobref);
+ GF_FREE(new);
+ goto out;
}
- fill = local->fill.next;
+ new->next = fill;
+ new->prev = new->next->prev;
+ new->next->prev = new;
+ new->prev->next = new;
- while (fill != &local->fill) {
- next = fill->next;
+ local->op_ret += copy_size;
+ }
+
+out:
+ return;
+}
- if ((vector != NULL) && (iobref != NULL)) {
- memcpy (((char *)vector) + copied, fill->vector,
- fill->count * sizeof (*vector));
-
- copied += (fill->count * sizeof (*vector));
- iobref_merge (iobref, fill->iobref);
- }
+void
+ra_frame_unwind(call_frame_t *frame)
+{
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ ra_fill_t *next = NULL;
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+
+ local = frame->local;
+ fill = local->fill.next;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+
+ frame->local = NULL;
+
+ while (fill != &local->fill) {
+ count += fill->count;
+ fill = fill->next;
+ }
+
+ vector = GF_CALLOC(count, sizeof(*vector), gf_ra_mt_iovec);
+ if (vector == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(iobref);
+ iobref = NULL;
+ }
+
+ fill = local->fill.next;
+
+ while (fill != &local->fill) {
+ next = fill->next;
+
+ if ((vector != NULL) && (iobref != NULL)) {
+ memcpy(((char *)vector) + copied, fill->vector,
+ fill->count * sizeof(*vector));
+
+ copied += (fill->count * sizeof(*vector));
+ if (iobref_merge(iobref, fill->iobref)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ iobref_unref(iobref);
+ iobref = NULL;
+ }
+ }
- fill->next->prev = fill->prev;
- fill->prev->next = fill->prev;
+ fill->next->prev = fill->prev;
+ fill->prev->next = fill->prev;
- iobref_unref (fill->iobref);
- free (fill->vector);
- free (fill);
+ iobref_unref(fill->iobref);
+ GF_FREE(fill->vector);
+ GF_FREE(fill);
- fill = next;
- }
+ fill = next;
+ }
- fd = local->fd;
- ret = fd_ctx_get (fd, frame->this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ fd = local->fd;
+ fd_ctx_get(fd, frame->this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
- vector, count, &file->stbuf, iobref);
+ STACK_UNWIND_STRICT(readv, frame, local->op_ret, local->op_errno, vector,
+ count, &file->stbuf, iobref, NULL);
- iobref_unref (iobref);
- pthread_mutex_destroy (&local->local_lock);
- free (local);
- free (vector);
+ iobref_unref(iobref);
+ pthread_mutex_destroy(&local->local_lock);
+ mem_put(local);
+ GF_FREE(vector);
- return;
+out:
+ return;
}
/*
@@ -418,47 +458,55 @@ ra_frame_unwind (call_frame_t *frame)
*
*/
void
-ra_frame_return (call_frame_t *frame)
+ra_frame_return(call_frame_t *frame)
{
- ra_local_t *local = NULL;
- int32_t wait_count = 0;
+ ra_local_t *local = NULL;
+ int32_t wait_count = 0;
- local = frame->local;
- assert (local->wait_count > 0);
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
- ra_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ra_local_unlock (local);
+ local = frame->local;
+ GF_ASSERT(local->wait_count > 0);
- if (!wait_count)
- ra_frame_unwind (frame);
+ ra_local_lock(local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ra_local_unlock(local);
- return;
+ if (!wait_count)
+ ra_frame_unwind(frame);
+
+out:
+ return;
}
-/*
+/*
* ra_page_wakeup -
* @page:
*
*/
ra_waitq_t *
-ra_page_wakeup (ra_page_t *page)
+ra_page_wakeup(ra_page_t *page)
{
- ra_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame;
+ ra_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ra_frame_fill (page, frame);
- }
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ra_frame_fill(page, frame);
+ }
- return waitq;
+ if (page->stale) {
+ ra_page_purge(page);
+ }
+out:
+ return waitq;
}
/*
@@ -467,16 +515,22 @@ ra_page_wakeup (ra_page_t *page)
*
*/
void
-ra_page_purge (ra_page_t *page)
+ra_page_purge(ra_page_t *page)
{
- page->prev->next = page->next;
- page->next->prev = page->prev;
-
- if (page->iobref) {
- iobref_unref (page->iobref);
- }
- free (page->vector);
- free (page);
+ GF_VALIDATE_OR_GOTO("read-ahead", page, out);
+
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ if (page->iobref) {
+ iobref_unref(page->iobref);
+ }
+
+ GF_FREE(page->vector);
+ GF_FREE(page);
+
+out:
+ return;
}
/*
@@ -487,59 +541,65 @@ ra_page_purge (ra_page_t *page)
*
*/
ra_waitq_t *
-ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
+ra_page_error(ra_page_t *page, int32_t op_ret, int32_t op_errno)
{
+ ra_waitq_t *waitq = NULL;
+ ra_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ ra_local_t *local = NULL;
- ra_waitq_t *waitq = NULL;
- ra_waitq_t *trav = NULL;
- call_frame_t *frame = NULL;
- ra_local_t *local = NULL;
+ GF_VALIDATE_OR_GOTO("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
- local = frame->local;
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
+ local = frame->local;
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
- ra_page_purge (page);
+ ra_page_purge(page);
- return waitq;
+out:
+ return waitq;
}
-/*
+/*
* ra_file_destroy -
* @file:
*
*/
void
-ra_file_destroy (ra_file_t *file)
+ra_file_destroy(ra_file_t *file)
{
- ra_conf_t *conf = NULL;
- ra_page_t *trav = NULL;
-
- conf = file->conf;
-
- ra_conf_lock (conf);
- {
- file->prev->next = file->next;
- file->next->prev = file->prev;
- }
- ra_conf_unlock (conf);
-
- trav = file->pages.next;
- while (trav != &file->pages) {
- ra_page_error (trav, -1, EINVAL);
- trav = file->pages.next;
- }
-
- pthread_mutex_destroy (&file->file_lock);
- free (file);
+ ra_conf_t *conf = NULL;
+ ra_page_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", file, out);
+
+ conf = file->conf;
+
+ ra_conf_lock(conf);
+ {
+ file->prev->next = file->next;
+ file->next->prev = file->prev;
+ }
+ ra_conf_unlock(conf);
+
+ trav = file->pages.next;
+ while (trav != &file->pages) {
+ ra_page_error(trav, -1, EINVAL);
+ trav = file->pages.next;
+ }
+
+ pthread_mutex_destroy(&file->file_lock);
+ GF_FREE(file);
+
+out:
+ return;
}
diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
new file mode 100644
index 00000000000..f07cfc5bba5
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RA_MEM_TYPES_H__
+#define __RA_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_ra_mem_types_ {
+ gf_ra_mt_ra_file_t = gf_common_mt_end + 1,
+ gf_ra_mt_ra_conf_t,
+ gf_ra_mt_ra_page_t,
+ gf_ra_mt_ra_waitq_t,
+ gf_ra_mt_ra_fill_t,
+ gf_ra_mt_iovec,
+ gf_ra_mt_end
+};
+#endif
diff --git a/xlators/performance/read-ahead/src/read-ahead-messages.h b/xlators/performance/read-ahead/src/read-ahead-messages.h
new file mode 100644
index 00000000000..0302b7a7122
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead-messages.h
@@ -0,0 +1,31 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _READ_AHEAD_MESSAGES_H_
+#define _READ_AHEAD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(READ_AHEAD, READ_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ READ_AHEAD_MSG_VOL_MISCONFIGURED, READ_AHEAD_MSG_NO_MEMORY,
+ READ_AHEAD_MSG_FD_CONTEXT_NOT_SET,
+ READ_AHEAD_MSG_UNDESTROYED_FILE_FOUND,
+ READ_AHEAD_MSG_XLATOR_CONF_NULL);
+
+#endif /* _READ_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
index e4c1ab2dab0..5246e1317d2 100644
--- a/xlators/performance/read-ahead/src/read-ahead.c
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -1,226 +1,201 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*
- TODO:
- - handle O_DIRECT
- - maintain offset, flush on lseek
- - ensure efficient memory managment in case of random seek
+/*
+ TODO:
+ - handle O_DIRECT
+ - maintain offset, flush on lseek
+ - ensure efficient memory management in case of random seek
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
#include "read-ahead.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include <assert.h>
#include <sys/time.h>
+#include "read-ahead-messages.h"
static void
-read_ahead (call_frame_t *frame, ra_file_t *file);
-
+read_ahead(call_frame_t *frame, ra_file_t *file);
int
-ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ra_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- long wbflags = 0;
-
- conf = this->private;
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- wbflags = (long)frame->local;
-
- file = CALLOC (1, sizeof (*file));
- if (!file) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
-
- if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT open, we disable caching on it */
-
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
-
- if (wbflags & GF_OPEN_NOWB) {
- file->disabled = 1;
- }
-
- file->offset = (unsigned long long) 0;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
-
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
-
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
-
- if (!file->disabled) {
- file->page_count = 1;
- }
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
- if (ret == -1) {
- ra_file_destroy (file);
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = GF_CALLOC(1, sizeof(*file), gf_ra_mt_ra_file_t);
+ if (!file) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long)0;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long)0;
+ file->pages.file = file;
+
+ ra_conf_lock(conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock(conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init(&file->file_lock, NULL);
+
+ if (!file->disabled) {
+ file->page_count = 1;
+ }
+
+ ret = fd_ctx_set(fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_msg(frame->this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_NO_MEMORY,
+ "cannot set read-ahead context"
+ "information in fd (%p)",
+ fd);
+ ra_file_destroy(file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
-
int
-ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
-{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
-
- conf = this->private;
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- file = CALLOC (1, sizeof (*file));
- if (!file) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
-
- if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT open, we disable caching on it */
-
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
-
- file->offset = (unsigned long long) 0;
- //file->size = fd->inode->buf.st_size;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
-
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
-
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
- if (ret == -1) {
- ra_file_destroy (file);
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ra_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = GF_CALLOC(1, sizeof(*file), gf_ra_mt_ra_file_t);
+ if (!file) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long)0;
+ // file->size = fd->inode->buf.ia_size;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long)0;
+ file->pages.file = file;
+
+ ra_conf_lock(conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock(conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init(&file->file_lock, NULL);
+
+ ret = fd_ctx_set(fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_NO_MEMORY,
+ "cannot set read ahead context"
+ "information in fd (%p)",
+ fd);
+ ra_file_destroy(file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
-
int
-ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ra_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- frame->local = (void *)(long)wbflags;
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
- STACK_WIND (frame, ra_open_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open,
- loc, flags, fd, wbflags);
+ STACK_WIND(frame, ra_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
+ return 0;
}
int
-ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ra_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, ra_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+
+ STACK_WIND(frame, ra_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
- return 0;
+ return 0;
}
/* free cache pages between offset and offset+size,
@@ -228,750 +203,1070 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
static void
-flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size)
+flush_region(call_frame_t *frame, ra_file_t *file, off_t offset, off_t size,
+ int for_write)
{
- ra_page_t *trav = NULL;
- ra_page_t *next = NULL;
+ ra_page_t *trav = NULL;
+ ra_page_t *next = NULL;
+
+ ra_file_lock(file);
+ {
+ trav = file->pages.next;
+ while (trav != &file->pages && trav->offset < (offset + size)) {
+ next = trav->next;
+ if (trav->offset >= offset) {
+ if (!trav->waitq) {
+ ra_page_purge(trav);
+ } else {
+ trav->stale = 1;
+
+ if (for_write) {
+ trav->poisoned = 1;
+ }
+ }
+ }
+ trav = next;
+ }
+ }
+ ra_file_unlock(file);
+}
+
+int
+ra_release(xlator_t *this, fd_t *fd)
+{
+ uint64_t tmp_file = 0;
+ int ret = 0;
- ra_file_lock (file);
- {
- trav = file->pages.next;
- while (trav != &file->pages
- && trav->offset < (offset + size)) {
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
- next = trav->next;
- if (trav->offset >= offset && !trav->waitq) {
- ra_page_purge (trav);
- }
- trav = next;
- }
- }
- ra_file_unlock (file);
+ ret = fd_ctx_del(fd, this, &tmp_file);
+
+ if (!ret) {
+ ra_file_destroy((ra_file_t *)(long)tmp_file);
+ }
+
+out:
+ return 0;
}
+void
+read_ahead(call_frame_t *frame, ra_file_t *file)
+{
+ off_t ra_offset = 0;
+ size_t ra_size = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ off_t cap = 0;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, file, out);
+
+ if (!file->page_count) {
+ goto out;
+ }
+
+ ra_size = file->page_size * file->page_count;
+ ra_offset = gf_floor(file->offset, file->page_size);
+ cap = file->size ? file->size : file->offset + ra_size;
+
+ while (ra_offset < min(file->offset + ra_size, cap)) {
+ ra_file_lock(file);
+ {
+ trav = ra_page_get(file, ra_offset);
+ }
+ ra_file_unlock(file);
+
+ if (!trav)
+ break;
+
+ ra_offset += file->page_size;
+ }
+
+ if (trav) {
+ /* comfortable enough */
+ goto out;
+ }
+
+ trav_offset = ra_offset;
+
+ cap = file->size ? file->size : ra_offset + ra_size;
+
+ while (trav_offset < min(ra_offset + ra_size, cap)) {
+ fault = 0;
+ ra_file_lock(file);
+ {
+ trav = ra_page_get(file, trav_offset);
+ if (!trav) {
+ fault = 1;
+ trav = ra_page_create(file, trav_offset);
+ if (trav)
+ trav->dirty = 1;
+ }
+ }
+ ra_file_unlock(file);
+
+ if (!trav) {
+ /* OUT OF MEMORY */
+ break;
+ }
+
+ if (fault) {
+ gf_msg_trace(frame->this->name, 0, "RA at offset=%" PRId64,
+ trav_offset);
+ ra_page_fault(file, frame, trav_offset);
+ }
+ trav_offset += file->page_size;
+ }
+
+out:
+ return;
+}
int
-ra_release (xlator_t *this, fd_t *fd)
+ra_need_atime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ GF_ASSERT(frame);
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+dispatch_requests(call_frame_t *frame, ra_file_t *file)
{
- uint64_t tmp_file = 0;
- int ret = 0;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ call_frame_t *ra_frame = NULL;
+ char need_atime_update = 1;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, file, out);
+
+ local = frame->local;
+ conf = file->conf;
+
+ rounded_offset = gf_floor(local->offset, file->page_size);
+ rounded_end = gf_roof(local->offset + local->size, file->page_size);
+
+ trav_offset = rounded_offset;
+
+ while (trav_offset < rounded_end) {
+ fault = 0;
+
+ ra_file_lock(file);
+ {
+ trav = ra_page_get(file, trav_offset);
+ if (!trav) {
+ trav = ra_page_create(file, trav_offset);
+ if (!trav) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ fault = 1;
+ need_atime_update = 0;
+ }
+ trav->dirty = 0;
+
+ if (trav->ready) {
+ gf_msg_trace(frame->this->name, 0, "HIT at offset=%" PRId64 ".",
+ trav_offset);
+ ra_frame_fill(trav, frame);
+ } else {
+ gf_msg_trace(frame->this->name, 0,
+ "IN-TRANSIT at "
+ "offset=%" PRId64 ".",
+ trav_offset);
+ ra_wait_on_page(trav, frame);
+ need_atime_update = 0;
+ }
+ }
+ unlock:
+ ra_file_unlock(file);
+
+ if (local->op_ret == -1) {
+ goto out;
+ }
+
+ if (fault) {
+ gf_msg_trace(frame->this->name, 0, "MISS at offset=%" PRId64 ".",
+ trav_offset);
+ ra_page_fault(file, frame, trav_offset);
+ }
+
+ trav_offset += file->page_size;
+ }
+
+ if (need_atime_update && conf->force_atime_update) {
+ /* TODO: use untimens() since readv() can confuse underlying
+ io-cache and others */
+ ra_frame = copy_frame(frame);
+ if (ra_frame == NULL) {
+ goto out;
+ }
- ret = fd_ctx_del (fd, this, &tmp_file);
-
- if (!ret) {
- ra_file_destroy ((ra_file_t *)(long)tmp_file);
- }
+ STACK_WIND(ra_frame, ra_need_atime_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->readv, file->fd, 1, 1, 0,
+ NULL);
+ }
- return 0;
+out:
+ return;
}
+int
+ra_readv_disabled_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ GF_ASSERT(frame);
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
-void
-read_ahead (call_frame_t *frame, ra_file_t *file)
+ return 0;
+}
+
+int
+ra_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- off_t ra_offset = 0;
- size_t ra_size = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- off_t cap = 0;
- char fault = 0;
+ ra_file_t *file = NULL;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ int op_errno = EINVAL;
+ char expected_offset = 1;
+ uint64_t tmp_file = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ conf = this->private;
+
+ gf_msg_trace(this->name, 0,
+ "NEW REQ at offset=%" PRId64 " for size=%" GF_PRI_SIZET "",
+ offset, size);
+
+ fd_ctx_get(fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file || file->disabled) {
+ goto disabled;
+ }
+
+ if (file->offset != offset) {
+ gf_msg_trace(this->name, 0,
+ "unexpected offset (%" PRId64 " != %" PRId64
+ ") "
+ "resetting",
+ file->offset, offset);
+
+ expected_offset = file->expected = file->page_count = 0;
+ } else {
+ gf_msg_trace(this->name, 0,
+ "expected offset (%" PRId64 ") when page_count=%d", offset,
+ file->page_count);
+
+ if (file->expected < (file->page_size * conf->page_count)) {
+ file->expected += size;
+ file->page_count = min((file->expected / file->page_size),
+ conf->page_count);
+ }
+ }
- if (!file->page_count)
- return;
+ if (!expected_offset) {
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
- ra_size = file->page_size * file->page_count;
- ra_offset = floor (file->offset, file->page_size);
- cap = file->size ? file->size : file->offset + ra_size;
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- while (ra_offset < min (file->offset + ra_size, cap)) {
+ local->fd = fd;
+ local->offset = offset;
+ local->size = size;
+ local->wait_count = 1;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, ra_offset);
- }
- ra_file_unlock (file);
+ local->fill.next = &local->fill;
+ local->fill.prev = &local->fill;
- if (!trav)
- break;
+ pthread_mutex_init(&local->local_lock, NULL);
- ra_offset += file->page_size;
- }
+ frame->local = local;
- if (trav)
- /* comfortable enough */
- return;
+ dispatch_requests(frame, file);
- trav_offset = ra_offset;
+ flush_region(frame, file, 0, gf_floor(offset, file->page_size), 0);
- trav = file->pages.next;
- cap = file->size ? file->size : ra_offset + ra_size;
+ read_ahead(frame, file);
- while (trav_offset < min(ra_offset + ra_size, cap)) {
- fault = 0;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- fault = 1;
- trav = ra_page_create (file, trav_offset);
- if (trav)
- trav->dirty = 1;
- }
- }
- ra_file_unlock (file);
+ file->offset = offset + size;
- if (!trav) {
- /* OUT OF MEMORY */
- break;
- }
+ ra_frame_return(frame);
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "RA at offset=%"PRId64, trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
- trav_offset += file->page_size;
- }
+ return 0;
- return;
+unwind:
+ STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+
+disabled:
+ STACK_WIND(frame, ra_readv_disabled_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
}
+int
+ra_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ GF_ASSERT(frame);
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ra_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- STACK_DESTROY (frame->root);
- return 0;
+ GF_ASSERT(frame);
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
+int
+ra_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
-static void
-dispatch_requests (call_frame_t *frame, ra_file_t *file)
-{
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- call_frame_t *ra_frame = NULL;
- char need_atime_update = 1;
- char fault = 0;
-
- local = frame->local;
- conf = file->conf;
-
- rounded_offset = floor (local->offset, file->page_size);
- rounded_end = roof (local->offset + local->size, file->page_size);
-
- trav_offset = rounded_offset;
- trav = file->pages.next;
-
- while (trav_offset < rounded_end) {
- fault = 0;
-
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- trav = ra_page_create (file, trav_offset);
- fault = 1;
- need_atime_update = 0;
- }
-
- if (!trav) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
- }
-
- if (trav->ready) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "HIT at offset=%"PRId64".",
- trav_offset);
- ra_frame_fill (trav, frame);
- } else {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "IN-TRANSIT at offset=%"PRId64".",
- trav_offset);
- ra_wait_on_page (trav, frame);
- need_atime_update = 0;
- }
- }
- unlock:
- ra_file_unlock (file);
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "MISS at offset=%"PRId64".",
- trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
-
- trav_offset += file->page_size;
- }
-
- if (need_atime_update && conf->force_atime_update) {
- /* TODO: use untimens() since readv() can confuse underlying
- io-cache and others */
- ra_frame = copy_frame (frame);
- if (ra_frame == NULL) {
- goto out;
- }
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- STACK_WIND (ra_frame, ra_need_atime_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, 1, 1);
- }
+ STACK_WIND(frame, ra_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
-out:
- return ;
+unwind:
+ STACK_UNWIND_STRICT(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-
int
-ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ra_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
-
- return 0;
-}
+ int32_t op_errno = EINVAL;
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
-int
-ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- ra_file_t *file = NULL;
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- int op_errno = 0;
- int ret = 0;
- char expected_offset = 1;
- uint64_t tmp_file = 0;
-
- conf = this->private;
-
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
- offset, size);
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "readv received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ STACK_WIND(frame, ra_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
- if (file->offset != offset) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unexpected offset (%"PRId64" != %"PRId64") resetting",
- file->offset, offset);
+unwind:
+ STACK_UNWIND_STRICT(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- expected_offset = file->expected = file->page_count = 0;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "expected offset (%"PRId64") when page_count=%d",
- offset, file->page_count);
+int
+ra_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
- if (file->expected < (conf->page_size * conf->page_count)) {
- file->expected += size;
- file->page_count = min ((file->expected / file->page_size),
- conf->page_count);
- }
- }
+ GF_ASSERT(frame);
- if (!expected_offset) {
- flush_region (frame, file, 0, file->pages.prev->offset + 1);
- }
+ file = frame->local;
- if (file->disabled) {
- STACK_WIND (frame, ra_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, size, offset);
- return 0;
- }
+ if (file) {
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
+ }
- local = (void *) CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
- local->fd = fd;
- local->offset = offset;
- local->size = size;
- local->wait_count = 1;
+int
+ra_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
- local->fill.next = &local->fill;
- local->fill.prev = &local->fill;
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
- pthread_mutex_init (&local->local_lock, NULL);
+ inode = fd->inode;
- frame->local = local;
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- dispatch_requests (frame, file);
+ if (!file)
+ continue;
- flush_region (frame, file, 0, floor (offset, file->page_size));
+ if (iter_fd == fd)
+ frame->local = file;
- read_ahead (frame, file);
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
- ra_frame_return (frame);
+ /* reset the read-ahead counters too */
+ file->expected = file->page_count = 0;
+ }
+ }
+ UNLOCK(&inode->lock);
- file->offset = offset + size;
+ STACK_WIND(frame, ra_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
-
- return 0;
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int
-ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ra_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
-}
-
+ GF_ASSERT(frame);
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
int
-ra_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *prebuf, struct stat *postbuf)
+ra_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
+ GF_ASSERT(frame);
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
int
-ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "flush received on fd with no"
- " file set in its context");
- goto unwind;
+ra_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc, unwind);
+
+ inode = loc->inode;
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_ftruncate.
+ */
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
}
+ }
+ UNLOCK(&inode->lock);
- flush_region (frame, file, 0, file->pages.prev->offset+1);
-
- STACK_WIND (frame, ra_flush_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush,
- fd);
- return 0;
+ STACK_WIND(frame, ra_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+void
+ra_page_dump(struct ra_page *page)
+{
+ int i = 0;
+ call_frame_t *frame = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ ra_waitq_t *trav = NULL;
-int
-ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
-{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "fsync received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ if (page == NULL) {
+ goto out;
+ }
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1);
- }
+ gf_proc_dump_write("offset", "%" PRId64, page->offset);
- STACK_WIND (frame, ra_fsync_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
- return 0;
+ gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);
-unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
+ gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write("poisoned", "%s", page->poisoned ? "yes" : "no");
-int
-ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");
+
+ for (trav = page->waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ sprintf(key, "waiting-frame[%d]", i++);
+ gf_proc_dump_write(key, "%" PRId64, frame->root->unique);
+ }
+
+out:
+ return;
+}
+
+int32_t
+ra_fdctx_dump(xlator_t *this, fd_t *fd)
{
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ int32_t ret = 0, i = 0;
+ uint64_t tmp_file = 0;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ fd_ctx_get(fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file == NULL) {
+ ret = 0;
+ goto out;
+ }
- fd = frame->local;
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.read-ahead", "file");
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ gf_proc_dump_add_section("%s", key_prefix);
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ ret = __inode_path(fd->inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
+ gf_proc_dump_write("fd", "%p", fd);
+ gf_proc_dump_write("disabled", "%s", file->disabled ? "yes" : "no");
-int
-ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
-{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "writev received on fd with"
- "no file set in its context");
- goto unwind;
- }
+ if (file->disabled) {
+ ret = 0;
+ goto out;
+ }
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ gf_proc_dump_write("page-size", "%" PRId64, file->page_size);
- /* reset the read-ahead counters too */
- file->expected = file->page_count = 0;
+ gf_proc_dump_write("page-count", "%u", file->page_count);
- frame->local = fd;
+ gf_proc_dump_write("next-expected-offset-for-sequential-reads", "%" PRId64,
+ file->offset);
- STACK_WIND (frame, ra_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ for (page = file->pages.next; page != &file->pages; page = page->next) {
+ gf_proc_dump_write("page", "%d: %p", i++, (void *)page);
+ ra_page_dump(page);
+ }
- return 0;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+ra_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+ ra_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ if (conf->force_atime_update) {
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
+ }
+ UNLOCK(&inode->lock);
+ }
+
+ STACK_WIND(frame, ra_attr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+int
+ra_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_truncate.
+ */
+ flush_region(frame, file, 0, file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ STACK_WIND(frame, ra_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
int
-ra_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ GF_ASSERT(frame);
+
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+static int
+ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ STACK_WIND(frame, ra_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
int
-ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
- return 0;
+ GF_ASSERT(frame);
+
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+static int
+ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO(frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO(frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ tmp_file = 0;
+ fd_ctx_get(iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ STACK_WIND(frame, ra_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
int
-ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+ra_priv_dump(xlator_t *this)
+{
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ if (!this) {
+ goto out;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_XLATOR_CONF_NULL,
+ "conf null in xlator");
+ goto out;
+ }
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.read-ahead", "priv");
+
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ ret = pthread_mutex_trylock(&conf->conf_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write("page_size", "%" PRIu64, conf->page_size);
+ gf_proc_dump_write("page_count", "%d", conf->page_count);
+ gf_proc_dump_write("force_atime_update", "%d",
+ conf->force_atime_update);
+ }
+ pthread_mutex_unlock(&conf->conf_lock);
+
+ ret = 0;
+out:
+ if (ret && conf) {
+ gf_proc_dump_write("Unable to dump priv",
+ "(Lock acquisition failed) %s", this->name);
+ }
+ return ret;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
+ int ret = -1;
- inode = loc->inode;
+ if (!this) {
+ goto out;
+ }
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ ret = xlator_mem_acct_init(this, gf_ra_mt_end + 1);
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READ_AHEAD_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
+ }
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- loc, offset);
- return 0;
+out:
+ return ret;
}
-
int
-ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+reconfigure(xlator_t *this, dict_t *options)
{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
+ ra_conf_t *conf = NULL;
+ int ret = -1;
- inode = fd->inode;
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO("read-ahead", this->private, out);
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ conf = this->private;
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ GF_OPTION_RECONF("page-count", conf->page_count, options, uint32, out);
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat,
- fd);
- return 0;
-}
+ GF_OPTION_RECONF("page-size", conf->page_size, options, size_uint64, out);
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
-int
-ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
-
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- fd, offset);
- return 0;
+ ret = 0;
+out:
+ return ret;
}
int
-ra_priv_dump (xlator_t *this)
+init(xlator_t *this)
{
- ra_conf_t *conf = NULL;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ ra_conf_t *conf = NULL;
+ int32_t ret = -1;
- if (!this)
- return -1;
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READ_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: read-ahead not configured with exactly one"
+ " child");
+ goto out;
+ }
- ret = pthread_mutex_trylock (&conf->conf_lock);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Unable to lock client %s"
- " errno: %d", this->name, errno);
- return -1;
- }
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, READ_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+ conf = (void *)GF_CALLOC(1, sizeof(*conf), gf_ra_mt_ra_conf_t);
+ if (conf == NULL) {
+ goto out;
+ }
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.read-ahead",
- "priv");
+ conf->page_size = this->ctx->page_size;
- gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_build_key (key, key_prefix, "page_size");
- gf_proc_dump_write (key, "%d", conf->page_size);
- gf_proc_dump_build_key (key, key_prefix, "page_count");
- gf_proc_dump_write (key, "%d", conf->page_count);
- gf_proc_dump_build_key (key, key_prefix, "force_atime_update");
- gf_proc_dump_write (key, "%d", conf->force_atime_update);
+ GF_OPTION_INIT("page-size", conf->page_size, size_uint64, out);
- pthread_mutex_unlock (&conf->conf_lock);
+ GF_OPTION_INIT("page-count", conf->page_count, uint32, out);
- return 0;
-}
+ GF_OPTION_INIT("force-atime-update", conf->force_atime_update, bool, out);
-int
-init (xlator_t *this)
-{
- ra_conf_t *conf = NULL;
- dict_t *options = this->options;
- char *page_count_string = NULL;
- int32_t ret = -1;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: read-ahead not configured with exactly one"
- " child");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = (void *) CALLOC (1, sizeof (*conf));
- if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
- goto out;
- }
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
- conf->page_size = this->ctx->page_size;
- conf->page_count = 4;
-
- if (dict_get (options, "page-count"))
- page_count_string = data_to_str (dict_get (options,
- "page-count"));
- if (page_count_string)
- {
- if (gf_string2uint_base10 (page_count_string, &conf->page_count)
- != 0)
- {
- gf_log ("read-ahead",
- GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "page-count\"",
- page_count_string);
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_count = %u",
- conf->page_count);
- }
-
- if (dict_get (options, "force-atime-update")) {
- char *force_atime_update_str = data_to_str (dict_get (options,
- "force-atime-update"));
- if (gf_string2boolean (force_atime_update_str,
- &conf->force_atime_update) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'force-atime-update' takes only boolean "
- "options");
- goto out;
- }
- if (conf->force_atime_update)
- gf_log (this->name, GF_LOG_DEBUG, "Forcing atime "
- "updates on cache hit");
- }
-
- conf->files.next = &conf->files;
- conf->files.prev = &conf->files;
-
- pthread_mutex_init (&conf->conf_lock, NULL);
- this->private = conf;
- ret = 0;
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
+
+ pthread_mutex_init(&conf->conf_lock, NULL);
+
+ this->local_pool = mem_pool_new(ra_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READ_AHEAD_MSG_NO_MEMORY,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = conf;
+ ret = 0;
out:
- if (ret == -1) {
- if (conf != NULL) {
- FREE (conf);
- }
- }
+ if (ret == -1) {
+ GF_FREE(conf);
+ }
- return ret;
+ return ret;
}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- ra_conf_t *conf = this->private;
+ ra_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("read-ahead", this, out);
+
+ conf = this->private;
+ if (conf == NULL) {
+ goto out;
+ }
+
+ this->private = NULL;
- if (conf == NULL)
- return;
+ /* The files structures allocated in open and create are not deleted.
+ * until that is freed, marking the below assert as warning.
+ GF_ASSERT ((conf->files.next == &conf->files)
+ && (conf->files.prev == &conf->files));
+ */
+ if (!((conf->files.next == &conf->files) &&
+ (conf->files.prev == &conf->files))) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ READ_AHEAD_MSG_UNDESTROYED_FILE_FOUND,
+ "undestroyed read ahead file structures found");
+ }
- pthread_mutex_destroy (&conf->conf_lock);
- FREE (conf);
+ pthread_mutex_destroy(&conf->conf_lock);
+ GF_FREE(conf);
- this->private = NULL;
- return;
+out:
+ return;
}
struct xlator_fops fops = {
- .open = ra_open,
- .create = ra_create,
- .readv = ra_readv,
- .writev = ra_writev,
- .flush = ra_flush,
- .fsync = ra_fsync,
- .truncate = ra_truncate,
- .ftruncate = ra_ftruncate,
- .fstat = ra_fstat,
-};
-
-struct xlator_mops mops = {
+ .open = ra_open,
+ .create = ra_create,
+ .readv = ra_readv,
+ .writev = ra_writev,
+ .flush = ra_flush,
+ .fsync = ra_fsync,
+ .truncate = ra_truncate,
+ .ftruncate = ra_ftruncate,
+ .fstat = ra_fstat,
+ .discard = ra_discard,
+ .zerofill = ra_zerofill,
};
struct xlator_cbks cbks = {
- .release = ra_release,
+ .release = ra_release,
};
struct xlator_dumpops dumpops = {
- .priv = ra_priv_dump,
+ .priv = ra_priv_dump,
+ .fdctx = ra_fdctx_dump,
};
struct volume_options options[] = {
- { .key = {"force-atime-update"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"page-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 16
- },
- { .key = {NULL} },
+ {
+ .key = {"read-ahead"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable read-ahead",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"force-atime-update"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {1},
+ .tags = {"read-ahead"},
+ .default_value = "false"},
+ {.key = {"page-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 16,
+ .default_value = "4",
+ .op_version = {1},
+ .tags = {"read-ahead"},
+ .description = "Number of pages that will be pre-fetched"},
+ {.key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 1048576 * 64,
+ .default_value = "131072",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"read-ahead"},
+ .description = "Page size with which read-ahead performs server I/O"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"read-ahead"},
+ .description = "Enable/Disable read ahead translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "read-ahead",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h
index d11143551f0..e9432fb47cc 100644
--- a/xlators/performance/read-ahead/src/read-ahead.h
+++ b/xlators/performance/read-ahead/src/read-ahead.h
@@ -1,36 +1,22 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __READ_AHEAD_H
#define __READ_AHEAD_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
+#include "read-ahead-mem-types.h"
struct ra_conf;
struct ra_local;
@@ -38,82 +24,77 @@ struct ra_page;
struct ra_file;
struct ra_waitq;
-
struct ra_waitq {
- struct ra_waitq *next;
- void *data;
+ struct ra_waitq *next;
+ void *data;
};
-
struct ra_fill {
- struct ra_fill *next;
- struct ra_fill *prev;
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
- struct iobref *iobref;
+ struct ra_fill *next;
+ struct ra_fill *prev;
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
};
-
struct ra_local {
- mode_t mode;
- struct ra_fill fill;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- off_t pending_offset;
- size_t pending_size;
- fd_t *fd;
- int32_t wait_count;
- pthread_mutex_t local_lock;
+ mode_t mode;
+ struct ra_fill fill;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ off_t pending_offset;
+ size_t pending_size;
+ fd_t *fd;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
};
-
struct ra_page {
- struct ra_page *next;
- struct ra_page *prev;
- struct ra_file *file;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ra_waitq *waitq;
- struct iobref *iobref;
+ struct ra_page *next;
+ struct ra_page *prev;
+ struct ra_file *file;
+ char dirty; /* Internal request, not from user. */
+ char poisoned; /* Pending read invalidated by write. */
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ra_waitq *waitq;
+ struct iobref *iobref;
+ char stale;
};
-
struct ra_file {
- struct ra_file *next;
- struct ra_file *prev;
- struct ra_conf *conf;
- fd_t *fd;
- int disabled;
- size_t expected;
- struct ra_page pages;
- off_t offset;
- size_t size;
- int32_t refcount;
- pthread_mutex_t file_lock;
- struct stat stbuf;
- uint64_t page_size;
- uint32_t page_count;
+ struct ra_file *next;
+ struct ra_file *prev;
+ struct ra_conf *conf;
+ fd_t *fd;
+ int disabled;
+ size_t expected;
+ struct ra_page pages;
+ off_t offset;
+ size_t size;
+ int32_t refcount;
+ pthread_mutex_t file_lock;
+ struct iatt stbuf;
+ uint64_t page_size;
+ uint32_t page_count;
};
-
struct ra_conf {
- uint64_t page_size;
- uint32_t page_count;
- void *cache_block;
- struct ra_file files;
- gf_boolean_t force_atime_update;
- pthread_mutex_t conf_lock;
+ uint64_t page_size;
+ uint32_t page_count;
+ void *cache_block;
+ struct ra_file files;
+ gf_boolean_t force_atime_update;
+ pthread_mutex_t conf_lock;
};
-
typedef struct ra_conf ra_conf_t;
typedef struct ra_local ra_local_t;
typedef struct ra_page ra_page_t;
@@ -122,77 +103,69 @@ typedef struct ra_waitq ra_waitq_t;
typedef struct ra_fill ra_fill_t;
ra_page_t *
-ra_page_get (ra_file_t *file,
- off_t offset);
+ra_page_get(ra_file_t *file, off_t offset);
ra_page_t *
-ra_page_create (ra_file_t *file,
- off_t offset);
+ra_page_create(ra_file_t *file, off_t offset);
void
-ra_page_fault (ra_file_t *file,
- call_frame_t *frame,
- off_t offset);
+ra_page_fault(ra_file_t *file, call_frame_t *frame, off_t offset);
void
-ra_wait_on_page (ra_page_t *page,
- call_frame_t *frame);
+ra_wait_on_page(ra_page_t *page, call_frame_t *frame);
ra_waitq_t *
-ra_page_wakeup (ra_page_t *page);
+ra_page_wakeup(ra_page_t *page);
void
-ra_page_flush (ra_page_t *page);
+ra_page_flush(ra_page_t *page);
ra_waitq_t *
-ra_page_error (ra_page_t *page,
- int32_t op_ret,
- int32_t op_errno);
+ra_page_error(ra_page_t *page, int32_t op_ret, int32_t op_errno);
void
-ra_page_purge (ra_page_t *page);
+ra_page_purge(ra_page_t *page);
void
-ra_frame_return (call_frame_t *frame);
+ra_frame_return(call_frame_t *frame);
void
-ra_frame_fill (ra_page_t *page,
- call_frame_t *frame);
+ra_frame_fill(ra_page_t *page, call_frame_t *frame);
void
-ra_file_destroy (ra_file_t *file);
+ra_file_destroy(ra_file_t *file);
static inline void
-ra_file_lock (ra_file_t *file)
+ra_file_lock(ra_file_t *file)
{
- pthread_mutex_lock (&file->file_lock);
+ pthread_mutex_lock(&file->file_lock);
}
static inline void
-ra_file_unlock (ra_file_t *file)
+ra_file_unlock(ra_file_t *file)
{
- pthread_mutex_unlock (&file->file_lock);
+ pthread_mutex_unlock(&file->file_lock);
}
static inline void
-ra_conf_lock (ra_conf_t *conf)
+ra_conf_lock(ra_conf_t *conf)
{
- pthread_mutex_lock (&conf->conf_lock);
+ pthread_mutex_lock(&conf->conf_lock);
}
static inline void
-ra_conf_unlock (ra_conf_t *conf)
+ra_conf_unlock(ra_conf_t *conf)
{
- pthread_mutex_unlock (&conf->conf_lock);
+ pthread_mutex_unlock(&conf->conf_lock);
}
static inline void
-ra_local_lock (ra_local_t *local)
+ra_local_lock(ra_local_t *local)
{
- pthread_mutex_lock (&local->local_lock);
+ pthread_mutex_lock(&local->local_lock);
}
static inline void
-ra_local_unlock (ra_local_t *local)
+ra_local_unlock(ra_local_t *local)
{
- pthread_mutex_unlock (&local->local_lock);
+ pthread_mutex_unlock(&local->local_lock);
}
#endif /* __READ_AHEAD_H */
diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/performance/readdir-ahead/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am
new file mode 100644
index 00000000000..3d6b6ae951f
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/Makefile.am
@@ -0,0 +1,18 @@
+xlator_LTLIBRARIES = readdir-ahead.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+readdir_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+readdir_ahead_la_SOURCES = readdir-ahead.c
+readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h \
+ readdir-ahead-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
new file mode 100644
index 00000000000..498ffae7f64
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RDA_MEM_TYPES_H__
+#define __RDA_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_rda_mem_types_ {
+ gf_rda_mt_rda_local = gf_common_mt_end + 1,
+ gf_rda_mt_rda_fd_ctx,
+ gf_rda_mt_rda_priv,
+ gf_rda_mt_inode_ctx_t,
+ gf_rda_mt_end
+};
+
+#endif
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
new file mode 100644
index 00000000000..28ec14dd845
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
@@ -0,0 +1,30 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _READDIR_AHEAD_MESSAGES_H_
+#define _READDIR_AHEAD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(READDIR_AHEAD, READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED, READDIR_AHEAD_MSG_NO_MEMORY,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ READDIR_AHEAD_MSG_OUT_OF_SEQUENCE, READDIR_AHEAD_MSG_DICT_OP_FAILED);
+
+#endif /* _READDIR_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
new file mode 100644
index 00000000000..4ba7ee7077a
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
@@ -0,0 +1,1382 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * performance/readdir-ahead preloads a local buffer with directory entries
+ * on opendir. The optimization involves using maximum sized gluster rpc
+ * requests (128k) to minimize overhead of smaller client requests.
+ *
+ * For example, fuse currently supports a maximum readdir buffer of 4k
+ * (regardless of the filesystem client's buffer size). readdir-ahead should
+ * effectively convert these smaller requests into fewer, larger sized requests
+ * for simple, sequential workloads (i.e., ls).
+ *
+ * The translator is currently designed to handle the simple, sequential case
+ * only. If a non-sequential directory read occurs, readdir-ahead disables
+ * preloads on the directory.
+ */
+
+#include <math.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include "readdir-ahead.h"
+#include "readdir-ahead-mem-types.h"
+#include <glusterfs/defaults.h>
+#include "readdir-ahead-messages.h"
+static int
+rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+
+static void
+rda_local_wipe(struct rda_local *local)
+{
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->xattrs)
+ dict_unref(local->xattrs);
+ if (local->inode)
+ inode_unref(local->inode);
+}
+
+/*
+ * Get (or create) the fd context for storing prepopulated directory
+ * entries.
+ */
+static struct rda_fd_ctx *
+get_rda_fd_ctx(fd_t *fd, xlator_t *this)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ LOCK(&fd->lock);
+
+ if (__fd_ctx_get(fd, this, &val) < 0) {
+ ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), gf_rda_mt_rda_fd_ctx);
+ if (!ctx)
+ goto out;
+
+ LOCK_INIT(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->entries.list);
+ ctx->state = RDA_FD_NEW;
+ /* ctx offset values initialized to 0 */
+ ctx->xattrs = NULL;
+
+ if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx) < 0) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ goto out;
+ }
+ } else {
+ ctx = (struct rda_fd_ctx *)(uintptr_t)val;
+ }
+out:
+ UNLOCK(&fd->lock);
+ return ctx;
+}
+
+static rda_inode_ctx_t *
+__rda_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get1(inode, this, &ctx_uint);
+ if (ret == 0)
+ return (rda_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_rda_mt_inode_ctx_t);
+ if (!ctx_p)
+ return NULL;
+
+ GF_ATOMIC_INIT(ctx_p->generation, 0);
+
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set1(inode, this, &ctx_uint);
+ if (ret < 0) {
+ GF_FREE(ctx_p);
+ return NULL;
+ }
+
+ return ctx_p;
+}
+
+static int
+__rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+ struct iatt tmp_stat = {
+ 0,
+ };
+
+ ctx_p = __rda_inode_ctx_get(inode, this);
+ if (!ctx_p)
+ return -1;
+
+ if ((!stbuf_in) || (stbuf_in->ia_ctime == 0)) {
+ /* A fop modified a file but valid stbuf is not provided.
+ * Can't update iatt to reflect results of fop and hence
+ * invalidate the iatt stored in dentry.
+ *
+ * An example of this case can be response of write request
+ * that is cached in write-behind.
+ */
+ if (stbuf_in)
+ tmp_stat = *stbuf_in;
+ else
+ tmp_stat = ctx_p->statbuf;
+ memset(&ctx_p->statbuf, 0, sizeof(ctx_p->statbuf));
+ gf_uuid_copy(ctx_p->statbuf.ia_gfid, tmp_stat.ia_gfid);
+ ctx_p->statbuf.ia_type = tmp_stat.ia_type;
+ GF_ATOMIC_INC(ctx_p->generation);
+ } else {
+ if (ctx_p->statbuf.ia_ctime) {
+ if (stbuf_in->ia_ctime < ctx_p->statbuf.ia_ctime) {
+ goto out;
+ }
+
+ if ((stbuf_in->ia_ctime == ctx_p->statbuf.ia_ctime) &&
+ (stbuf_in->ia_ctime_nsec < ctx_p->statbuf.ia_ctime_nsec)) {
+ goto out;
+ }
+ } else {
+ if ((generation != -1) &&
+ (generation != GF_ATOMIC_GET(ctx_p->generation)))
+ goto out;
+ }
+
+ ctx_p->statbuf = *stbuf_in;
+ }
+
+out:
+ if (stbuf_out)
+ *stbuf_out = ctx_p->statbuf;
+
+ return 0;
+}
+
+static int
+rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __rda_inode_ctx_update_iatts(inode, this, stbuf_in, stbuf_out,
+ generation);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+/*
+ * Reset the tracking state of the context.
+ */
+static void
+rda_reset_ctx(xlator_t *this, struct rda_fd_ctx *ctx)
+{
+ struct rda_priv *priv = NULL;
+
+ priv = this->private;
+
+ ctx->state = RDA_FD_NEW;
+ ctx->cur_offset = 0;
+ ctx->next_offset = 0;
+ ctx->op_errno = 0;
+
+ gf_dirent_free(&ctx->entries);
+ GF_ATOMIC_SUB(priv->rda_cache_size, ctx->cur_size);
+ ctx->cur_size = 0;
+
+ if (ctx->xattrs) {
+ dict_unref(ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+}
+
+static void
+rda_mark_inode_dirty(xlator_t *this, inode_t *inode)
+{
+ inode_t *parent = NULL;
+ fd_t *fd = NULL;
+ uint64_t val = 0;
+ int32_t ret = 0;
+ struct rda_fd_ctx *fd_ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ parent = inode_parent(inode, NULL, NULL);
+ if (parent) {
+ LOCK(&parent->lock);
+ {
+ list_for_each_entry(fd, &parent->fd_list, inode_list)
+ {
+ val = 0;
+ fd_ctx_get(fd, this, &val);
+ if (val == 0)
+ continue;
+
+ fd_ctx = (void *)(uintptr_t)val;
+ uuid_utoa_r(inode->gfid, gfid);
+ if (!GF_ATOMIC_GET(fd_ctx->prefetching))
+ continue;
+
+ LOCK(&fd_ctx->lock);
+ {
+ if (GF_ATOMIC_GET(fd_ctx->prefetching)) {
+ if (fd_ctx->writes_during_prefetch == NULL)
+ fd_ctx->writes_during_prefetch = dict_new();
+
+ ret = dict_set_int8(fd_ctx->writes_during_prefetch,
+ gfid, 1);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "marking to invalidate stats of %s from an "
+ "in progress "
+ "prefetching has failed, might result in "
+ "stale stat to "
+ "application",
+ gfid);
+ }
+ }
+ }
+ UNLOCK(&fd_ctx->lock);
+ }
+ }
+ UNLOCK(&parent->lock);
+ inode_unref(parent);
+ }
+
+ return;
+}
+
+/*
+ * Check whether we can handle a request. Offset verification is done by the
+ * caller, so we only check whether the preload buffer has completion status
+ * (including an error) or has some data to return.
+ */
+static gf_boolean_t
+rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
+{
+ if ((ctx->state & RDA_FD_EOD) || (ctx->state & RDA_FD_ERROR) ||
+ (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) ||
+ (request_size && ctx->cur_size >= request_size))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+rda_inode_ctx_get_iatt(inode_t *inode, xlator_t *this, struct iatt *attr)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ if (!inode || !this || !attr)
+ goto out;
+
+ LOCK(&inode->lock);
+ {
+ ctx_p = __rda_inode_ctx_get(inode, this);
+ if (ctx_p) {
+ *attr = ctx_p->statbuf;
+ }
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return;
+}
+
+/*
+ * Serve a request from the fd dentry list based on the size of the request
+ * buffer. ctx must be locked.
+ */
+static int32_t
+__rda_fill_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
+ struct rda_fd_ctx *ctx)
+{
+ gf_dirent_t *dirent, *tmp;
+ size_t dirent_size, size = 0;
+ int32_t count = 0;
+ struct rda_priv *priv = NULL;
+ struct iatt tmp_stat = {
+ 0,
+ };
+
+ priv = this->private;
+
+ list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list)
+ {
+ dirent_size = gf_dirent_size(dirent->d_name);
+ if (size + dirent_size > request_size)
+ break;
+
+ memset(&tmp_stat, 0, sizeof(tmp_stat));
+
+ if (dirent->inode && (!((strcmp(dirent->d_name, ".") == 0) ||
+ (strcmp(dirent->d_name, "..") == 0)))) {
+ rda_inode_ctx_get_iatt(dirent->inode, this, &tmp_stat);
+ dirent->d_stat = tmp_stat;
+ }
+
+ size += dirent_size;
+ list_del_init(&dirent->list);
+ ctx->cur_size -= dirent_size;
+
+ GF_ATOMIC_SUB(priv->rda_cache_size, dirent_size);
+
+ list_add_tail(&dirent->list, &entries->list);
+ ctx->cur_offset = dirent->d_off;
+ count++;
+ }
+
+ if (ctx->cur_size <= priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+
+ return count;
+}
+
+static int32_t
+__rda_serve_readdirp(xlator_t *this, struct rda_fd_ctx *ctx, size_t size,
+ gf_dirent_t *entries, int *op_errno)
+{
+ int32_t ret = 0;
+
+ ret = __rda_fill_readdirp(this, entries, size, ctx);
+
+ if (!ret && (ctx->state & RDA_FD_ERROR)) {
+ ret = -1;
+ ctx->state &= ~RDA_FD_ERROR;
+
+ /*
+ * the preload has stopped running in the event of an error, so
+ * pass all future requests along
+ */
+ ctx->state |= RDA_FD_BYPASS;
+ }
+ /*
+ * Use the op_errno sent by lower layers as xlators above will check
+ * the op_errno for identifying whether readdir is completed or not.
+ */
+ *op_errno = ctx->op_errno;
+
+ return ret;
+}
+
+static int32_t
+rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ struct rda_fd_ctx *ctx = NULL;
+ int fill = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ int op_errno = 0;
+ gf_boolean_t serve = _gf_false;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ if (ctx->state & RDA_FD_BYPASS)
+ goto bypass;
+
+ INIT_LIST_HEAD(&entries.list);
+ LOCK(&ctx->lock);
+
+ /* recheck now that we have the lock */
+ if (ctx->state & RDA_FD_BYPASS) {
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If a new read comes in at offset 0 and the buffer has been
+ * completed, reset the context and kickstart the filler again.
+ */
+ if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) {
+ rda_reset_ctx(this, ctx);
+ /*
+ * Unref and discard the 'list of xattrs to be fetched'
+ * stored during opendir call. This is done above - inside
+ * rda_reset_ctx().
+ * Now, ref the xdata passed by md-cache in actual readdirp()
+ * call and use that for all subsequent internal readdirp()
+ * requests issued by this xlator.
+ */
+ ctx->xattrs = dict_ref(xdata);
+ fill = 1;
+ }
+
+ /*
+ * If a readdir occurs at an unexpected offset or we already have a
+ * request pending, admit defeat and just get out of the way.
+ */
+ if (off != ctx->cur_offset || ctx->stub) {
+ ctx->state |= RDA_FD_BYPASS;
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If we haven't bypassed the preload, this means we can either serve
+ * the request out of the preload or the request that enables us to do
+ * so is in flight...
+ */
+ if (rda_can_serve_readdirp(ctx, size)) {
+ ret = __rda_serve_readdirp(this, ctx, size, &entries, &op_errno);
+ serve = _gf_true;
+
+ if (op_errno == ENOENT &&
+ !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)))
+ op_errno = 0;
+ } else {
+ ctx->stub = fop_readdirp_stub(frame, NULL, fd, size, off, xdata);
+ if (!ctx->stub) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 1;
+ if (!ctx->xattrs)
+ ctx->xattrs = dict_ref(xdata);
+ ctx->state |= RDA_FD_RUNNING;
+ }
+ }
+
+ UNLOCK(&ctx->lock);
+
+ if (serve) {
+ STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata);
+ gf_dirent_free(&entries);
+ }
+
+ if (fill)
+ rda_fill_fd(frame, this, fd);
+
+ return 0;
+
+bypass:
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *dirent = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t serve_entries;
+ struct rda_local *local = frame->local;
+ struct rda_fd_ctx *ctx = local->ctx;
+ struct rda_priv *priv = this->private;
+ int fill = 1;
+ size_t dirent_size = 0;
+ int ret = 0;
+ gf_boolean_t serve = _gf_false;
+ call_stub_t *stub = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ uint64_t generation = 0;
+ call_frame_t *fill_frame = NULL;
+
+ INIT_LIST_HEAD(&serve_entries.list);
+ LOCK(&ctx->lock);
+
+ /* Verify that the preload buffer is still pending on this data. */
+ if (ctx->next_offset != local->offset) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, READDIR_AHEAD_MSG_OUT_OF_SEQUENCE,
+ "Out of sequence directory preload.");
+ ctx->state |= (RDA_FD_BYPASS | RDA_FD_ERROR);
+ ctx->op_errno = EUCLEAN;
+
+ goto out;
+ }
+
+ if (entries) {
+ list_for_each_entry_safe(dirent, tmp, &entries->list, list)
+ {
+ list_del_init(&dirent->list);
+
+ /* must preserve entry order */
+ list_add_tail(&dirent->list, &ctx->entries.list);
+ if (dirent->inode) {
+ /* If ctxp->stat is invalidated, don't update it
+ * with dirent->d_stat as we don't have
+ * generation number of the inode when readdirp
+ * request was initiated. So, we pass 0 for
+ * generation number
+ */
+
+ generation = -1;
+ if (ctx->writes_during_prefetch) {
+ memset(gfid, 0, sizeof(gfid));
+ uuid_utoa_r(dirent->inode->gfid, gfid);
+ if (dict_get(ctx->writes_during_prefetch, gfid))
+ generation = 0;
+ }
+
+ if (!((strcmp(dirent->d_name, ".") == 0) ||
+ (strcmp(dirent->d_name, "..") == 0))) {
+ rda_inode_ctx_update_iatts(dirent->inode, this,
+ &dirent->d_stat, &dirent->d_stat,
+ generation);
+ }
+ }
+
+ dirent_size = gf_dirent_size(dirent->d_name);
+
+ ctx->cur_size += dirent_size;
+
+ GF_ATOMIC_ADD(priv->rda_cache_size, dirent_size);
+
+ ctx->next_offset = dirent->d_off;
+ }
+ }
+
+ if (ctx->writes_during_prefetch) {
+ dict_unref(ctx->writes_during_prefetch);
+ ctx->writes_during_prefetch = NULL;
+ }
+
+ GF_ATOMIC_DEC(ctx->prefetching);
+
+ if (ctx->cur_size >= priv->rda_high_wmark)
+ ctx->state &= ~RDA_FD_PLUGGED;
+
+ if (!op_ret || op_errno == ENOENT) {
+ /* we've hit eod */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_EOD;
+ ctx->op_errno = op_errno;
+ } else if (op_ret == -1) {
+ /* kill the preload and pend the error */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_ERROR;
+ ctx->op_errno = op_errno;
+ }
+
+ /*
+ * NOTE: The strict bypass logic in readdirp() means a pending request
+ * is always based on ctx->cur_offset.
+ */
+ if (ctx->stub && rda_can_serve_readdirp(ctx, ctx->stub->args.size)) {
+ ret = __rda_serve_readdirp(this, ctx, ctx->stub->args.size,
+ &serve_entries, &op_errno);
+ serve = _gf_true;
+ stub = ctx->stub;
+ ctx->stub = NULL;
+ }
+
+out:
+ /*
+ * If we have been marked for bypass and have no pending stub, clear the
+ * run state so we stop preloading the context with entries.
+ */
+ if (!ctx->stub &&
+ ((ctx->state & RDA_FD_BYPASS) ||
+ GF_ATOMIC_GET(priv->rda_cache_size) > priv->rda_cache_limit))
+ ctx->state &= ~RDA_FD_RUNNING;
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 0;
+ if (ctx->xattrs) {
+ /*
+ * fill = 0 and hence rda_fill_fd() won't be invoked.
+ * unref for ref taken in rda_fill_fd()
+ */
+ dict_unref(ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+
+ fill_frame = ctx->fill_frame;
+ ctx->fill_frame = NULL;
+ }
+
+ if (op_errno == ENOENT &&
+ !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)))
+ op_errno = 0;
+
+ UNLOCK(&ctx->lock);
+ if (fill_frame) {
+ rda_local_wipe(fill_frame->local);
+ STACK_DESTROY(fill_frame->root);
+ }
+
+ if (serve) {
+ STACK_UNWIND_STRICT(readdirp, stub->frame, ret, op_errno,
+ &serve_entries, xdata);
+ gf_dirent_free(&serve_entries);
+ call_stub_destroy(stub);
+ }
+
+ if (fill)
+ rda_fill_fd(frame, this, local->fd);
+
+ return 0;
+}
+
+/*
+ * Start prepopulating the fd context with directory entries.
+ */
+static int
+rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ call_frame_t *nframe = NULL;
+ struct rda_local *local = NULL;
+ struct rda_local *orig_local = frame->local;
+ struct rda_fd_ctx *ctx;
+ off_t offset;
+ struct rda_priv *priv = this->private;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ LOCK(&ctx->lock);
+
+ if (ctx->state & RDA_FD_NEW) {
+ ctx->state &= ~RDA_FD_NEW;
+ ctx->state |= RDA_FD_RUNNING;
+ if (priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+ }
+
+ offset = ctx->next_offset;
+
+ if (!ctx->fill_frame) {
+ nframe = copy_frame(frame);
+ if (!nframe) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local->ctx = ctx;
+ local->fd = fd_ref(fd);
+ nframe->local = local;
+
+ ctx->fill_frame = nframe;
+
+ if (!ctx->xattrs && orig_local && orig_local->xattrs) {
+ /* when this function is invoked by rda_opendir_cbk */
+ ctx->xattrs = dict_ref(orig_local->xattrs);
+ }
+ } else {
+ nframe = ctx->fill_frame;
+ local = nframe->local;
+ }
+
+ local->offset = offset;
+ GF_ATOMIC_INC(ctx->prefetching);
+
+ UNLOCK(&ctx->lock);
+
+ STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size,
+ offset, ctx->xattrs);
+
+ return 0;
+
+err:
+ if (nframe) {
+ rda_local_wipe(nframe->local);
+ FRAME_DESTROY(nframe);
+ }
+
+ return -1;
+}
+
+static int32_t
+rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ if (!op_ret)
+ rda_fill_fd(frame, this, fd);
+
+ RDA_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ struct rda_local *local = NULL;
+
+ if (xdata) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /*
+ * Retrieve list of keys set by md-cache xlator and store it
+ * in local to be consumed in rda_opendir_cbk
+ */
+ local->xattrs = dict_copy_with_ref(xdata, NULL);
+ frame->local = local;
+ }
+
+ STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ rda_mark_inode_dirty(this, local->inode);
+
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(writev, frame, this, fd->inode, xdata, fd,
+ vector, count, off, flags, iobref);
+ return 0;
+}
+
+static int32_t
+rda_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fallocate, frame, this, fd->inode, xdata, fd,
+ keep_size, offset, len);
+ return 0;
+}
+
+static int32_t
+rda_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(zerofill, frame, this, fd->inode, xdata, fd,
+ offset, len);
+ return 0;
+}
+
+static int32_t
+rda_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(discard, frame, this, fd->inode, xdata, fd,
+ offset, len);
+ return 0;
+}
+
+static int32_t
+rda_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(ftruncate, frame, this, fd->inode, xdata, fd,
+ offset);
+ return 0;
+}
+
+static int32_t
+rda_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(truncate, frame, this, loc->inode, xdata, loc,
+ offset);
+ return 0;
+}
+
+static int32_t
+rda_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(setxattr, frame, this, loc->inode, xdata, loc,
+ dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fsetxattr, frame, this, fd->inode, xdata, fd,
+ dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(setattr, frame, this, loc->inode, xdata, loc,
+ stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fsetattr, frame, this, fd->inode, xdata, fd,
+ stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(removexattr, frame, this, loc->inode, xdata,
+ loc, name);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fremovexattr, frame, this, fd->inode, xdata, fd,
+ name);
+ return 0;
+}
+
+static int32_t
+rda_releasedir(xlator_t *this, fd_t *fd)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ if (fd_ctx_del(fd, this, &val) < 0)
+ return -1;
+
+ ctx = (struct rda_fd_ctx *)(uintptr_t)val;
+ if (!ctx)
+ return 0;
+
+ rda_reset_ctx(this, ctx);
+
+ if (ctx->fill_frame)
+ STACK_DESTROY(ctx->fill_frame->root);
+
+ if (ctx->stub)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ "released a directory with a pending stub");
+
+ GF_FREE(ctx);
+ return 0;
+}
+
+static int
+rda_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del1(inode, this, &ctx_uint);
+ if (!ctx_uint)
+ return 0;
+
+ ctx = (rda_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ GF_FREE(ctx);
+
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1);
+
+ if (ret != 0)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READDIR_AHEAD_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
+
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ struct rda_priv *priv = this->private;
+
+ GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
+ err);
+ GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("parallel-readdir", priv->parallel_readdir, options, bool,
+ err);
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, err);
+
+ return 0;
+err:
+ return -1;
+}
+
+int
+init(xlator_t *this)
+{
+ struct rda_priv *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: readdir-ahead not configured with exactly one"
+ " child");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv);
+ if (!priv)
+ goto err;
+ this->private = priv;
+
+ GF_ATOMIC_INIT(priv->rda_cache_size, 0);
+
+ this->local_pool = mem_pool_new(struct rda_local, 32);
+ if (!this->local_pool)
+ goto err;
+
+ GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64, err);
+ GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64, err);
+ GF_OPTION_INIT("parallel-readdir", priv->parallel_readdir, bool, err);
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, err);
+
+ return 0;
+
+err:
+ if (this->local_pool)
+ mem_pool_destroy(this->local_pool);
+ if (priv)
+ GF_FREE(priv);
+
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, out);
+
+ GF_FREE(this->private);
+
+out:
+ return;
+}
+
+struct xlator_fops fops = {
+ .opendir = rda_opendir,
+ .readdirp = rda_readdirp,
+ /* inode write */
+ /* TODO: invalidate a dentry's stats if its pointing to a directory
+ * when entry operations happen in that directory
+ */
+ .writev = rda_writev,
+ .truncate = rda_truncate,
+ .ftruncate = rda_ftruncate,
+ .fallocate = rda_fallocate,
+ .discard = rda_discard,
+ .zerofill = rda_zerofill,
+ /* metadata write */
+ .setxattr = rda_setxattr,
+ .fsetxattr = rda_fsetxattr,
+ .setattr = rda_setattr,
+ .fsetattr = rda_fsetattr,
+ .removexattr = rda_removexattr,
+ .fremovexattr = rda_fremovexattr,
+};
+
+struct xlator_cbks cbks = {
+ .releasedir = rda_releasedir,
+ .forget = rda_forget,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"readdir-ahead"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable readdir-ahead",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"rda-request-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 131072,
+ .default_value = "131072",
+ .description = "size of buffer in readdirp calls initiated by "
+ "readdir-ahead ",
+ },
+ {
+ .key = {"rda-low-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 10 * GF_UNIT_MB,
+ .default_value = "4096",
+ .description = "the value under which readdir-ahead plugs",
+ },
+ {
+ .key = {"rda-high-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 100 * GF_UNIT_MB,
+ .default_value = "128KB",
+ .description = "the value over which readdir-ahead unplugs",
+ },
+ {
+ .key = {"rda-cache-limit"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = INFINITY,
+ .default_value = "10MB",
+ .description = "maximum size of cache consumed by readdir-ahead "
+ "xlator. This value is global and total memory "
+ "consumption by readdir-ahead is capped by this "
+ "value, irrespective of the number/size of "
+ "directories cached",
+ },
+ {.key = {"parallel-readdir"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .default_value = "off",
+ .description = "If this option is enabled, the readdir operation "
+ "is performed in parallel on all the bricks, thus "
+ "improving the performance of readdir. Note that "
+ "the performance improvement is higher in large "
+ "clusters"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"readdir-ahead"},
+ .description = "Enable/Disable readdir ahead translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "readdir-ahead",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
new file mode 100644
index 00000000000..619c41059ff
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READDIR_AHEAD_H
+#define __READDIR_AHEAD_H
+
+/* state flags */
+#define RDA_FD_NEW (1 << 0)
+#define RDA_FD_RUNNING (1 << 1)
+#define RDA_FD_EOD (1 << 2)
+#define RDA_FD_ERROR (1 << 3)
+#define RDA_FD_BYPASS (1 << 4)
+#define RDA_FD_PLUGGED (1 << 5)
+
+#define RDA_COMMON_MODIFICATION_FOP(name, frame, this, __inode, __xdata, \
+ args...) \
+ do { \
+ struct rda_local *__local = NULL; \
+ rda_inode_ctx_t *ctx_p = NULL; \
+ \
+ __local = mem_get0(this->local_pool); \
+ __local->inode = inode_ref(__inode); \
+ LOCK(&__inode->lock); \
+ { \
+ ctx_p = __rda_inode_ctx_get(__inode, this); \
+ } \
+ UNLOCK(&__inode->lock); \
+ __local->generation = GF_ATOMIC_GET(ctx_p->generation); \
+ \
+ frame->local = __local; \
+ if (__xdata) \
+ __local->xattrs = dict_ref(__xdata); \
+ \
+ STACK_WIND(frame, rda_##name##_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->name, args, __xdata); \
+ } while (0)
+
+#define RDA_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ struct rda_local *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local) { \
+ rda_local_wipe(__local); \
+ mem_put(__local); \
+ } \
+ } while (0)
+
+struct rda_fd_ctx {
+ off_t cur_offset; /* current head of the ctx */
+ size_t cur_size; /* current size of the preload */
+ off_t next_offset; /* tail of the ctx */
+ uint32_t state;
+ gf_lock_t lock;
+ gf_dirent_t entries;
+ call_frame_t *fill_frame;
+ call_stub_t *stub;
+ int op_errno;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ dict_t *writes_during_prefetch;
+ gf_atomic_t prefetching;
+};
+
+struct rda_local {
+ struct rda_fd_ctx *ctx;
+ fd_t *fd;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ inode_t *inode;
+ off_t offset;
+ uint64_t generation;
+ int32_t skip_dir;
+};
+
+struct rda_priv {
+ uint64_t rda_req_size;
+ uint64_t rda_low_wmark;
+ uint64_t rda_high_wmark;
+ uint64_t rda_cache_limit;
+ gf_atomic_t rda_cache_size;
+ gf_boolean_t parallel_readdir;
+};
+
+typedef struct rda_inode_ctx {
+ struct iatt statbuf;
+ gf_atomic_t generation;
+} rda_inode_ctx_t;
+
+#endif /* __READDIR_AHEAD_H */
diff --git a/xlators/performance/stat-prefetch/src/Makefile.am b/xlators/performance/stat-prefetch/src/Makefile.am
deleted file mode 100644
index cbea4bf5256..00000000000
--- a/xlators/performance/stat-prefetch/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-xlator_LTLIBRARIES = stat-prefetch.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-
-stat_prefetch_la_LDFLAGS = -module -avoidversion
-stat_prefetch_la_SOURCES = stat-prefetch.c
-noinst_HEADERS = stat-prefetch.h
-
-stat_prefetch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.c b/xlators/performance/stat-prefetch/src/stat-prefetch.c
deleted file mode 100644
index 341583b4683..00000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch.c
+++ /dev/null
@@ -1,3895 +0,0 @@
-/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "stat-prefetch.h"
-
-#define GF_SP_CACHE_BUCKETS 1
-#define GF_SP_CACHE_ENTRIES_EXPECTED 1048576
-
-typedef enum {
- SP_EXPECT,
- SP_DONT_EXPECT,
- SP_DONT_CARE
-}sp_expect_t;
-
-
-void
-sp_inode_ctx_free (xlator_t *this, sp_inode_ctx_t *ctx)
-{
- call_stub_t *stub = NULL, *tmp = NULL;
-
- if (ctx == NULL) {
- goto out;
- }
-
- LOCK (&ctx->lock);
- {
- if (!list_empty (&ctx->waiting_ops)) {
- gf_log (this->name, GF_LOG_CRITICAL, "inode ctx is "
- "being freed even when there are file "
- "operations waiting for lookup-behind to "
- "complete. The operations in the waiting list "
- "are:");
- list_for_each_entry_safe (stub, tmp, &ctx->waiting_ops,
- list) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "OP (%d)", stub->fop);
-
- list_del_init (&stub->list);
- call_stub_destroy (stub);
- }
- }
- }
- UNLOCK (&ctx->lock);
-
- LOCK_DESTROY (&ctx->lock);
- FREE (ctx);
-
-out:
- return;
-}
-
-
-sp_inode_ctx_t *
-sp_inode_ctx_init ()
-{
- sp_inode_ctx_t *inode_ctx = NULL;
-
- inode_ctx = CALLOC (1, sizeof (*inode_ctx));
- if (inode_ctx == NULL) {
- goto out;
- }
-
- LOCK_INIT (&inode_ctx->lock);
- INIT_LIST_HEAD (&inode_ctx->waiting_ops);
-
-out:
- return inode_ctx;
-}
-
-
-int
-sp_update_inode_ctx (xlator_t *this, inode_t *inode, int32_t *op_ret,
- int32_t *op_errno, char *lookup_in_progress,
- char *looked_up, struct stat *stbuf,
- struct list_head *waiting_ops, int32_t *error)
-{
- int32_t ret = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- uint64_t value = 0;
-
- ret = inode_ctx_get (inode, this, &value);
- if (ret == 0) {
- inode_ctx = (sp_inode_ctx_t *)(long)value;
- }
-
- if (inode_ctx == NULL) {
- ret = -1;
- if (error != NULL) {
- *error = EINVAL;
- }
-
- goto out;
- }
-
- LOCK (&inode_ctx->lock);
- {
- if (op_ret != NULL) {
- inode_ctx->op_ret = *op_ret;
- }
-
- if (op_errno != NULL) {
- inode_ctx->op_errno = *op_errno;
- }
-
- if (looked_up != NULL) {
- inode_ctx->looked_up = *looked_up;
- }
-
- if (lookup_in_progress != NULL) {
- inode_ctx->lookup_in_progress = *lookup_in_progress;
- }
-
- if ((op_ret == 0) && (stbuf != NULL)
- && S_ISDIR (stbuf->st_mode)) {
- memcpy (&inode_ctx->stbuf, stbuf,
- sizeof (*stbuf));
- }
-
- if (waiting_ops != NULL) {
- list_splice_init (&inode_ctx->waiting_ops,
- waiting_ops);
- }
- }
- UNLOCK (&inode_ctx->lock);
-
-out:
- return ret;
-}
-
-
-sp_inode_ctx_t *
-sp_check_and_create_inode_ctx (xlator_t *this, inode_t *inode,
- sp_expect_t expect, glusterfs_fop_t caller)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0;
-
- if ((this == NULL) || (inode == NULL)) {
- goto out;
- }
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &value);
- if (ret == 0) {
- if (expect == SP_DONT_EXPECT) {
- gf_log (this->name, GF_LOG_DEBUG, "inode_ctx "
- "is not NULL (caller %d)", caller);
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long)value;
- } else {
- if (expect == SP_EXPECT) {
- gf_log (this->name, GF_LOG_DEBUG, "inode_ctx is"
- " NULL (caller %d)", caller);
- }
-
- inode_ctx = sp_inode_ctx_init ();
- if (inode_ctx != NULL) {
- ret = __inode_ctx_put (inode, this,
- (long)inode_ctx);
- if (ret == -1) {
- sp_inode_ctx_free (this, inode_ctx);
- inode_ctx = NULL;
- }
- }
- }
- }
- UNLOCK (&inode->lock);
-
-out:
- return inode_ctx;
-}
-
-
-sp_cache_t *
-sp_cache_ref (sp_cache_t *cache)
-{
- if (cache == NULL) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- cache->ref++;
- }
- UNLOCK (&cache->lock);
-
-out:
- return cache;;
-}
-
-
-void
-sp_cache_unref (sp_cache_t *cache)
-{
- int refcount = 0;
- if (cache == NULL) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- refcount = --cache->ref;
- }
- UNLOCK (&cache->lock);
-
- if (refcount == 0) {
- rbthash_table_destroy (cache->table);
- FREE (cache);
- }
-
-out:
- return;
-}
-
-
-int32_t
-sp_process_inode_ctx (call_frame_t *frame, xlator_t *this, loc_t *loc,
- call_stub_t *stub, char *need_unwind, char *need_lookup,
- char *can_wind, int32_t *error, glusterfs_fop_t caller)
-{
- int32_t ret = -1, op_errno = -1;
- sp_local_t *local = NULL;
- sp_inode_ctx_t *inode_ctx = NULL;
- uint64_t value = 0;
-
- if (need_unwind != NULL) {
- *need_unwind = 1;
- }
-
- if ((this == NULL) || (loc == NULL) || (loc->inode == NULL)
- || (need_unwind == NULL) || (need_lookup == NULL)
- || (can_wind == NULL)) {
- op_errno = EINVAL;
- goto out;
- }
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p) (caller %d)", loc->inode, caller);
- *can_wind = 1;
- *need_unwind = 0;
- op_errno = 0;
- ret = 0;
- goto out;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, out, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- if (!(inode_ctx->looked_up || inode_ctx->lookup_in_progress)) {
- if (frame->local == NULL) {
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name,
- local,
- unlock,
- op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s",
- strerror (op_errno));
- goto unlock;
- }
- }
-
- *need_lookup = 1;
- inode_ctx->lookup_in_progress = 1;
- }
-
- if (inode_ctx->looked_up) {
- *can_wind = 1;
- } else {
- list_add_tail (&stub->list, &inode_ctx->waiting_ops);
- stub = NULL;
- }
-
- *need_unwind = 0;
- ret = 0;
- }
-unlock:
- UNLOCK (&inode_ctx->lock);
-
-out:
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- if (error != NULL) {
- *error = op_errno;
- }
-
- return ret;
-}
-
-
-inline uint32_t
-sp_hashfn (void *data, int len)
-{
- return gf_dm_hashfn ((const char *)data, len);
-}
-
-sp_cache_t *
-sp_cache_init (xlator_t *this)
-{
- sp_cache_t *cache = NULL;
- sp_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!priv)
- goto out;
-
- if (!priv->mem_pool)
- goto out;
-
- cache = CALLOC (1, sizeof (*cache));
- if (cache) {
- cache->table =
- rbthash_table_init (GF_SP_CACHE_BUCKETS,
- sp_hashfn, free,
- 0, priv->mem_pool);
- if (cache->table == NULL) {
- FREE (cache);
- cache = NULL;
- goto out;
- }
-
- LOCK_INIT (&cache->lock);
- cache->this = this;
- }
-
-out:
- return cache;
-}
-
-
-void
-sp_local_free (sp_local_t *local)
-{
- if (local) {
- loc_wipe (&local->loc);
- FREE (local);
- }
-}
-
-
-int32_t
-sp_cache_remove_entry (sp_cache_t *cache, char *name, char remove_all)
-{
- int32_t ret = -1;
- rbthash_table_t *table = NULL;
- xlator_t *this;
- sp_private_t *priv = NULL;
- void *data = NULL;
-
- if ((cache == NULL) || ((name == NULL) && !remove_all)) {
- goto out;
- }
-
- this = cache->this;
-
- if (this == NULL)
- goto out;
-
- if (this->private == NULL)
- goto out;
-
- priv = this->private;
-
- LOCK (&cache->lock);
- {
- if (remove_all) {
- table = cache->table;
- cache->table = rbthash_table_init (GF_SP_CACHE_BUCKETS,
- sp_hashfn,
- free,
- 0,
- priv->mem_pool);
- if (cache->table == NULL) {
- cache->table = table;
- } else {
- rbthash_table_destroy (table);
- ret = 0;
- }
- } else {
- data = rbthash_remove (cache->table, name,
- strlen (name));
- FREE (data);
- ret = 0;
- }
- }
- UNLOCK (&cache->lock);
-
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_get_entry (sp_cache_t *cache, char *name, gf_dirent_t **entry)
-{
- int32_t ret = -1;
- gf_dirent_t *tmp = NULL, *new = NULL;
-
- if ((cache == NULL) || (name == NULL) || (entry == NULL)) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- tmp = rbthash_get (cache->table, name, strlen (name));
- if (tmp != NULL) {
- new = gf_dirent_for_name (tmp->d_name);
- if (new == NULL) {
- goto unlock;
- }
-
- new->d_ino = tmp->d_ino;
- new->d_off = tmp->d_off;
- new->d_len = tmp->d_len;
- new->d_type = tmp->d_type;
- new->d_stat = tmp->d_stat;
-
- *entry = new;
- ret = 0;
- }
- }
-unlock:
- UNLOCK (&cache->lock);
-
-out:
- return ret;
-}
-
-
-void
-sp_cache_free (sp_cache_t *cache)
-{
- sp_cache_remove_entry (cache, NULL, 1);
- sp_cache_unref (cache);
-}
-
-
-sp_cache_t *
-__sp_get_cache_fd (xlator_t *this, fd_t *fd)
-{
- int32_t ret = -1;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- ret = __fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- goto out;
- }
-
- fd_ctx = (void *)(long) value;
-
- cache = fd_ctx->cache;
-
-out:
- return cache;
-}
-
-
-sp_cache_t *
-sp_get_cache_fd (xlator_t *this, fd_t *fd)
-{
- sp_cache_t *cache = NULL;
-
- if (fd == NULL) {
- goto out;
- }
-
- LOCK (&fd->lock);
- {
- cache = __sp_get_cache_fd (this, fd);
- if (cache != NULL) {
- sp_cache_ref (cache);
- }
- }
- UNLOCK (&fd->lock);
-
-out:
- return cache;
-}
-
-
-void
-sp_fd_ctx_free (sp_fd_ctx_t *fd_ctx)
-{
- if (fd_ctx == NULL) {
- goto out;
- }
-
- if (fd_ctx->parent_inode) {
- inode_unref (fd_ctx->parent_inode);
- fd_ctx->parent_inode = NULL;
- }
-
- if (fd_ctx->name) {
- FREE (fd_ctx->name);
- fd_ctx->name = NULL;
- }
-
- if (fd_ctx->cache) {
- sp_cache_free (fd_ctx->cache);
- }
-
- FREE (fd_ctx);
-out:
- return;
-}
-
-
-inline sp_fd_ctx_t *
-sp_fd_ctx_init (void)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = CALLOC (1, sizeof (*fd_ctx));
-
- return fd_ctx;
-}
-
-
-sp_fd_ctx_t *
-sp_fd_ctx_new (xlator_t *this, inode_t *parent, char *name, sp_cache_t *cache)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = sp_fd_ctx_init ();
- if (fd_ctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (parent) {
- fd_ctx->parent_inode = inode_ref (parent);
- }
-
- if (name) {
- fd_ctx->name = strdup (name);
- if (fd_ctx->name == NULL) {
- sp_fd_ctx_free (fd_ctx);
- fd_ctx = NULL;
- }
- }
-
- fd_ctx->cache = cache;
-
-out:
- return fd_ctx;
-}
-
-
-sp_cache_t *
-sp_del_cache_fd (xlator_t *this, fd_t *fd)
-{
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (fd == NULL) {
- goto out;
- }
-
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- fd_ctx = (void *)(long) value;
- cache = fd_ctx->cache;
- fd_ctx->cache = NULL;
- }
- }
- UNLOCK (&fd->lock);
-
-out:
- return cache;
-}
-
-
-sp_cache_t *
-sp_get_cache_inode (xlator_t *this, inode_t *inode, int32_t pid)
-{
- fd_t *fd = NULL;
- sp_cache_t *cache = NULL;
-
- if (inode == NULL) {
- goto out;
- }
-
- fd = fd_lookup (inode, pid);
- if (fd == NULL) {
- goto out;
- }
-
- cache = sp_get_cache_fd (this, fd);
-
- fd_unref (fd);
-out:
- return cache;
-}
-
-
-inline int32_t
-__sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- int32_t ret = -1;
- uint64_t value = 0;
-
- ret = __fd_ctx_get (fd, this, &value);
- if (!ret) {
- fd_ctx = (void *)(long)value;
- } else {
- fd_ctx = sp_fd_ctx_init ();
- if (fd_ctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- ret = -1;
- goto out;
- }
-
- ret = __fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- goto out;
- }
- }
-
- if (fd_ctx->cache) {
- sp_cache_free (fd_ctx->cache);
- }
-
- fd_ctx->cache = cache;
-
-out:
- return ret;
-}
-
-
-inline int32_t
-sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache)
-{
- int32_t ret = -1;
-
- if (fd != NULL) {
- LOCK (&fd->lock);
- {
- ret = __sp_put_cache (this, fd, cache);
- }
- UNLOCK (&fd->lock);
- }
-
- return ret;
-}
-
-
-int32_t
-sp_cache_add_entries (sp_cache_t *cache, gf_dirent_t *entries)
-{
- gf_dirent_t *entry = NULL, *new = NULL;
- int32_t ret = -1;
- uint64_t expected_offset = 0;
-
- LOCK (&cache->lock);
- {
- list_for_each_entry (entry, &entries->list, list) {
- if (S_ISDIR (entry->d_stat.st_mode)) {
- continue;
- }
-
- new = gf_dirent_for_name (entry->d_name);
- if (new == NULL) {
- goto unlock;
- }
-
- new->d_ino = entry->d_ino;
- new->d_off = entry->d_off;
- new->d_len = entry->d_len;
- new->d_type = entry->d_type;
- new->d_stat = entry->d_stat;
-
- ret = rbthash_insert (cache->table, new, new->d_name,
- strlen (new->d_name));
- if (ret == -1) {
- FREE (new);
- continue;
- }
-
- expected_offset = new->d_off;
- }
-
- cache->expected_offset = expected_offset;
-
- ret = 0;
- }
-unlock:
- UNLOCK (&cache->lock);
-
- return ret;
-}
-
-
-int32_t
-sp_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict, struct stat *postparent)
-{
- int ret = 0;
- struct list_head waiting_ops = {0, };
- call_stub_t *stub = NULL, *tmp = NULL;
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
- int need_unwind = 0;
- char looked_up = 0, lookup_in_progress = 0;
-
- INIT_LIST_HEAD (&waiting_ops);
-
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG, "local is NULL, but it is "
- "needed to find and resume operations waiting on "
- "lookup");
- goto out;
- }
- if (op_ret == -1) {
- cache = sp_get_cache_inode (this, local->loc.parent,
- frame->root->pid);
-
- if (cache) {
- sp_cache_remove_entry (cache, (char *)local->loc.name,
- 0);
- sp_cache_unref (cache);
- }
- }
-
- if (local->is_lookup)
- need_unwind = 1;
-
- lookup_in_progress = 0;
- looked_up = 1;
- ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret, &op_errno,
- &lookup_in_progress, &looked_up, buf,
- &waiting_ops, &op_errno);
-
- list_for_each_entry_safe (stub, tmp, &waiting_ops, list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
-
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
- dict, postparent);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_get_ancestors (char *path, char **parent, char **grand_parent)
-{
- int32_t ret = -1, i = 0;
- char *cpy = NULL;
-
- if (!path || !parent || !grand_parent) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; i < 2; i++) {
- if (!strcmp (path, "/")) {
- break;
- }
-
- cpy = strdup (path);
- if (cpy == NULL) {
- goto out;
- }
-
- path = dirname (cpy);
- switch (i)
- {
- case 0:
- *parent = path;
- break;
- case 1:
- *grand_parent = path;
- break;
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_remove_parent_entry (call_frame_t *frame, xlator_t *this,
- inode_table_t *itable, char *path)
-{
- char *parent = NULL, *grand_parent = NULL, *cpy = NULL;
- inode_t *inode_gp = NULL;
- sp_cache_t *cache_gp = NULL;
- int32_t ret = -1;
-
- ret = sp_get_ancestors (path, &parent, &grand_parent);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (grand_parent && strcmp (grand_parent, "/")) {
- inode_gp = inode_from_path (itable, grand_parent);
- if (inode_gp) {
- cache_gp = sp_get_cache_inode (this, inode_gp,
- frame->root->pid);
- if (cache_gp) {
- cpy = strdup (parent);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name,
- cpy, out, errno,
- ENOMEM);
- path = basename (cpy);
- sp_cache_remove_entry (cache_gp, path, 0);
- FREE (cpy);
-
- sp_cache_unref (cache_gp);
- }
- inode_unref (inode_gp);
- }
- }
-
- ret = 0;
-out:
- if (parent) {
- FREE (parent);
- }
-
- if (grand_parent) {
- FREE (grand_parent);
- }
-
- return ret;
-}
-
-
-void
-sp_is_empty (dict_t *this, char *key, data_t *value, void *data)
-{
- char *ptr = data;
-
- if (ptr && *ptr) {
- *ptr = 0;
- }
-}
-
-
-int32_t
-sp_lookup_helper (call_frame_t *frame,xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- stub = fop_lookup_stub (frame, sp_lookup_helper, loc,
- xattr_req);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, stub, unwind,
- op_errno, ENOMEM);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- if (op_ret == 0) {
- if (!inode_ctx->lookup_in_progress) {
- inode_ctx->lookup_in_progress = 1;
- can_wind = 1;
- } else {
- list_add_tail (&stub->list,
- &inode_ctx->waiting_ops);
- stub = NULL;
- }
- }
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- if (can_wind) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc,
- xattr_req);
- }
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- return 0;
-}
-
-
-/*
- * TODO: implement sending lookups for every fop done on this path. As of now
- * lookup on the path is sent only for the first fop on this path.
- */
-int32_t
-sp_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- gf_dirent_t *dirent = NULL;
- char entry_cached = 0;
- uint64_t value = 0;
- char xattr_req_empty = 1, can_wind = 0;
- sp_cache_t *cache = NULL;
- struct stat postparent = {0, }, buf = {0, };
- int32_t ret = -1, op_ret = -1, op_errno = EINVAL;
- sp_inode_ctx_t *inode_ctx = NULL, *parent_inode_ctx = NULL;
- sp_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- if (loc == NULL || loc->inode == NULL) {
- goto unwind;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_CARE, GF_FOP_LOOKUP);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if ((loc->parent == NULL) || (loc->name == NULL)) {
- goto wind;
- }
-
- if (xattr_req != NULL) {
- dict_foreach (xattr_req, sp_is_empty, &xattr_req_empty);
- }
-
- if (!xattr_req_empty) {
- goto wind;
- }
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- ret = sp_cache_get_entry (cache, (char *)loc->name, &dirent);
- if (ret == 0) {
- ret = inode_ctx_get (loc->parent, this, &value);
- if ((ret == 0) && (value != 0)) {
- parent_inode_ctx = (void *)(long)value;
- postparent = parent_inode_ctx->stbuf;
- buf = dirent->d_stat;
- op_ret = 0;
- op_errno = 0;
- entry_cached = 1;
- }
-
- FREE (dirent);
- }
- } else if (S_ISDIR (loc->inode->st_mode)) {
- cache = sp_get_cache_inode (this, loc->inode, frame->root->pid);
- if (cache) {
- ret = sp_cache_get_entry (cache, ".", &dirent);
- if (ret == 0) {
- ret = inode_ctx_get (loc->parent, this, &value);
- if ((ret == 0) && (value != 0)) {
- parent_inode_ctx = (void *)(long)value;
- postparent = parent_inode_ctx->stbuf;
- buf = dirent->d_stat;
- op_ret = 0;
- op_errno = 0;
- entry_cached = 1;
- }
-
- FREE (dirent);
- }
- }
- }
-
-wind:
- if (entry_cached) {
- if (cache) {
- cache->hits++;
- sp_cache_unref (cache);
- }
- } else {
- if (cache) {
- cache->miss++;
- sp_cache_unref (cache);
- }
-
- stub = fop_lookup_stub (frame, sp_lookup_helper, loc,
- xattr_req);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, stub, unwind,
- op_errno, ENOMEM);
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind,
- op_errno, ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s",
- strerror (op_errno));
- goto unwind;
- }
-
- local->is_lookup = 1;
-
- LOCK (&inode_ctx->lock);
- {
- if (inode_ctx->lookup_in_progress) {
- list_add_tail (&stub->list,
- &inode_ctx->waiting_ops);
- stub = NULL;
- } else {
- can_wind = 1;
- inode_ctx->lookup_in_progress = 1;
- }
- }
- UNLOCK (&inode_ctx->lock);
-
- if (can_wind) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc,
- xattr_req);
- }
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- return 0;
- }
-
-unwind:
- SP_STACK_UNWIND (lookup, frame, op_ret, op_errno, loc->inode, &buf,
- NULL, &postparent);
-
- return 0;
-}
-
-
-int32_t
-sp_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
- fd_t *fd = NULL;
- int32_t ret = 0;
- char was_present = 1;
- sp_private_t *priv = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- if (!this->private) {
- goto out;
- }
-
- local = frame->local;
- if (local == NULL) {
- goto out;
- }
-
- fd = local->fd;
-
- priv = this->private;
-
- LOCK (&priv->lock);
- {
- if (!priv->mem_pool)
- priv->mem_pool = mem_pool_new (rbthash_entry_t,
- GF_SP_CACHE_ENTRIES_EXPECTED);
- }
- UNLOCK (&priv->lock);
-
- if (!priv->mem_pool)
- goto out;
-
- LOCK (&fd->lock);
- {
- cache = __sp_get_cache_fd (this, fd);
- if (cache == NULL) {
- was_present = 0;
- cache = sp_cache_init (this);
- if (cache == NULL) {
- goto unlock;
- }
-
- ret = __sp_put_cache (this, fd, cache);
- if (ret == -1) {
- sp_cache_free (cache);
- goto unlock;
- }
- }
-
- sp_cache_ref (cache);
- }
-unlock:
- UNLOCK (&fd->lock);
-
- if (cache != NULL) {
- sp_cache_add_entries (cache, entries);
- if (was_present) {
- sp_cache_unref (cache);
- }
- }
-
-out:
- SP_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
- return 0;
-}
-
-
-int32_t
-sp_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- sp_cache_t *cache = NULL;
- sp_local_t *local = NULL;
- char *path = NULL;
- int32_t ret = -1;
-
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- if (off != cache->expected_offset) {
- sp_cache_remove_entry (cache, NULL, 1);
- }
-
- sp_cache_unref (cache);
- }
-
- ret = inode_path (fd->inode, NULL, &path);
- if (ret == -1) {
- goto unwind;
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, fd->inode->table,
- path);
-
- FREE (path);
-
- if (ret < 0) {
- errno = -ret;
- goto unwind;
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local) {
- local->fd = fd;
- frame->local = local;
- }
-
- STACK_WIND (frame, sp_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (readdir, frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
-{
- SP_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-
-int32_t
-sp_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf,
- struct stat *preoldparent, struct stat *postoldparent,
- struct stat *prenewparent, struct stat *postnewparent)
-{
- SP_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- return 0;
-}
-
-
-int32_t
-sp_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
-{
- sp_local_t *local = NULL;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- EINVAL);
-
- fd_ctx = sp_fd_ctx_new (this, local->loc.parent,
- (char *)local->loc.name, NULL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd_ctx, out, op_errno,
- ENOMEM);
-
- op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (op_ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- op_errno = ENOMEM;
- }
-
-out:
- SP_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_open_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if ((op_ret == -1) && ((op_errno != ENOENT)
- || !((op_errno == ENOENT)
- && (flags & O_CREAT)))) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (open, frame, -1, op_errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int wbflags)
-{
- call_stub_t *stub = NULL;
- sp_local_t *local = NULL;
- int32_t op_errno = -1, ret = -1;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (errno));
- goto out;
- }
-
- stub = fop_open_stub (frame, sp_open_helper, loc, flags, fd, wbflags);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno, GF_FOP_OPEN);
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (open, frame, -1, op_errno, fd);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd,
- wbflags);
- }
-
- return 0;
-
-}
-
-static int32_t
-sp_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
-{
- sp_local_t *local = NULL;
- sp_fd_ctx_t *fd_ctx = NULL;
- char lookup_in_progress = 0, looked_up = 0;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- EINVAL);
-
- looked_up = 1;
- op_ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret,
- &op_errno, &lookup_in_progress,
- &looked_up, buf, NULL, &op_errno);
- if (op_ret == -1) {
- goto out;
- }
-
- sp_update_inode_ctx (this, local->loc.parent, NULL, NULL, NULL,
- NULL, postparent, NULL, NULL);
-
- fd_ctx = sp_fd_ctx_new (this, local->loc.parent,
- (char *)local->loc.name, NULL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd_ctx, out, op_errno,
- ENOMEM);
-
- op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (op_ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- op_errno = ENOMEM;
- }
-
-out:
- SP_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-
-int32_t
-sp_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- sp_local_t *local = NULL;
- int32_t op_errno = -1, ret = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_CREATE);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- } else {
- STACK_WIND (frame, sp_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags,
- mode, fd);
- }
- return 0;
-}
-
-
-int32_t
-sp_opendir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- sp_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int32_t op_errno = -1, ret = -1;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- goto out;
- }
-
- stub = fop_opendir_stub (frame, sp_opendir_helper, loc, fd);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_OPENDIR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_new_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
-{
- sp_local_t *local = NULL;
- char lookup_in_progress = 0, looked_up = 0;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- if (local == NULL) {
- op_errno = EINVAL;
- goto out;
- }
-
- looked_up = 1;
- op_ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret,
- &op_errno, &lookup_in_progress,
- &looked_up, buf, NULL, &op_errno);
- if (op_ret == -1) {
- goto out;
- }
-
- sp_update_inode_ctx (this, local->loc.parent, NULL, NULL, NULL,
- NULL, postparent, NULL, NULL);
-
-out:
- SP_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf, preparent,
- postparent);
- return 0;
-}
-
-
-int32_t
-sp_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- int32_t ret = -1, op_errno = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
- sp_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_MKDIR);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else {
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t op_errno = -1, ret = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
- sp_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_MKNOD);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else {
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
-{
- int32_t ret = -1, op_errno = -1;
- char need_unwind = 1;
- sp_inode_ctx_t *inode_ctx = NULL;
- sp_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "%s", strerror (op_errno));
- goto out;
- }
-
- inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode,
- SP_DONT_EXPECT,
- GF_FOP_SYMLINK);
- if (inode_ctx == NULL) {
- op_errno = ENOMEM;
- goto out;
- }
-
- need_unwind = 0;
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else {
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
-{
- SP_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, preparent,
- postparent);
- return 0;
-}
-
-int32_t
-sp_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (oldloc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", oldloc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- call_stub_t *stub = NULL;
- sp_cache_t *cache = NULL;
- int32_t ret = 0, op_errno = -1;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->path, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->name, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->inode, out,
- op_errno, EINVAL);
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->name, out,
- op_errno, EINVAL);
-
- ret = sp_cache_remove_parent_entry (frame, this, newloc->parent->table,
- (char *)newloc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- cache = sp_get_cache_inode (this, oldloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)oldloc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_link_stub (frame, sp_link_helper, oldloc, newloc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, oldloc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno, GF_FOP_LINK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, oldloc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_truncate_stub (frame, sp_truncate_helper, loc, offset);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_TRUNCATE);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (ftruncate, frame, -1, errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *prestat, struct stat *poststat)
-{
- SP_STACK_UNWIND (setattr, frame, op_ret, op_errno, prestat, poststat);
- return 0;
-}
-
-
-int
-sp_setattr_helper (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct stat *buf, int32_t valid)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, buf, valid);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int
-sp_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct stat *buf, int32_t valid)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_setattr_stub (frame, sp_setattr_helper, loc, buf, valid);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_SETATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, buf, valid);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct stat *buf)
-{
- SP_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf);
- return 0;
-}
-
-
-int32_t
-sp_readlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out,
- op_errno, EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_readlink_stub (frame, sp_readlink_helper, loc, size);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_READLINK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *preparent,
- struct stat *postparent)
-{
- SP_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
- postparent);
- return 0;
-}
-
-
-
-int32_t
-sp_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- SP_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_unlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->parent->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- stub = fop_unlink_stub (frame, sp_unlink_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_UNLINK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
- }
-
- return 0;
-}
-
-
-void
-sp_remove_caches_from_all_fds_opened (xlator_t *this, inode_t *inode)
-{
- fd_t *fd = NULL;
- sp_cache_t *cache = NULL;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (fd, &inode->fd_list, inode_list) {
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- sp_cache_remove_entry (cache, NULL, 1);
- sp_cache_unref (cache);
- }
- }
- }
- UNLOCK (&inode->lock);
-}
-
-
-int32_t
-sp_rmdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- sp_remove_caches_from_all_fds_opened (this, loc->inode);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table,
- (char *)loc->path);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- stub = fop_rmdir_stub (frame, sp_rmdir_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_RMDIR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
-{
- SP_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
- return 0;
-}
-
-
-int32_t
-sp_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (readv, frame, -1, errno, NULL, -1, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- iobref);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (writev, frame, -1, errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (fsync, frame, -1, errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- uint64_t value = 0;
- char need_unwind = 0;
- char can_wind = 0;
- int32_t ret = 0, op_errno = -1;
- int32_t old_op_ret = -1, old_op_errno = -1;
- int32_t new_op_ret = -1, new_op_errno = -1;
- char old_inode_looked_up = 0, new_inode_looked_up = 0;
- sp_inode_ctx_t *old_inode_ctx = NULL, *new_inode_ctx = NULL;
-
- ret = inode_ctx_get (oldloc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", oldloc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- old_inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, old_inode_ctx, unwind,
- op_errno, EINVAL);
-
- LOCK (&old_inode_ctx->lock);
- {
- old_inode_looked_up = old_inode_ctx->looked_up;
- old_op_ret = old_inode_ctx->op_ret;
- old_op_errno = old_inode_ctx->op_errno;
- need_unwind = old_inode_ctx->need_unwind;
- }
- UNLOCK (&old_inode_ctx->lock);
-
- if (need_unwind) {
- /* there was an error while queuing up lookup stub for newloc */
- goto unwind;
- }
-
- if (newloc->inode != NULL) {
- ret = inode_ctx_get (newloc->inode, this, &value);
- if (ret == 0) {
- new_inode_ctx = (sp_inode_ctx_t *)(long)value;
- if (new_inode_ctx != NULL) {
- LOCK (&new_inode_ctx->lock);
- {
- new_inode_looked_up = new_inode_ctx->looked_up;
- new_op_ret = new_inode_ctx->op_ret;
- new_op_errno = new_inode_ctx->op_errno;
- }
- UNLOCK (&new_inode_ctx->lock);
- }
- }
- }
-
- if (new_inode_ctx == NULL) {
- if (old_op_ret == -1) {
- op_errno = old_op_errno;
- goto unwind;
- } else {
- can_wind = 1;
- }
- } else {
- if (new_inode_looked_up && old_inode_looked_up) {
- if ((old_op_ret == -1)
- || ((new_op_ret == -1)
- && (new_op_errno != ENOENT))) {
- if (old_op_ret == -1) {
- op_errno = old_op_errno;
- } else {
- op_errno = new_op_errno;
- }
-
- goto unwind;
- } else {
- can_wind = 1;
- }
- }
- }
-
- if (can_wind) {
- STACK_WIND (frame, sp_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
- }
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-
-int32_t
-sp_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,loc_t *newloc)
-{
- char need_unwind = 1;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- sp_cache_t *cache = NULL;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = -1, op_errno = -1;
- char old_inode_can_wind = 0, new_inode_can_wind = 0;
- char old_inode_need_lookup = 0, new_inode_need_lookup = 0;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->path, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->name, out,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, oldloc->inode, out,
- op_errno, EINVAL);
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, newloc->path, out,
- op_errno, EINVAL);
-
- cache = sp_get_cache_inode (this, oldloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)oldloc->name, 0);
- sp_cache_unref (cache);
- }
-
- cache = sp_get_cache_inode (this, newloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)newloc->name, 0);
- sp_cache_unref (cache);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, oldloc->parent->table,
- (char *)oldloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, newloc->parent->table,
- (char *)newloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (S_ISDIR (oldloc->inode->st_mode)) {
- sp_remove_caches_from_all_fds_opened (this, oldloc->inode);
- }
-
- stub = fop_rename_stub (frame, sp_rename_helper, oldloc, newloc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- ret = sp_process_inode_ctx (frame, this, oldloc, stub, &need_unwind,
- &old_inode_need_lookup, &old_inode_can_wind,
- &op_errno, GF_FOP_RENAME);
- if (ret == -1) {
- goto out;
- }
-
- if (newloc->inode != NULL) {
- stub = fop_rename_stub (frame, sp_rename_helper, oldloc,
- newloc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- ret = sp_process_inode_ctx (frame, this, newloc, stub,
- &need_unwind,
- &new_inode_need_lookup,
- &new_inode_can_wind, &op_errno,
- GF_FOP_RENAME);
- if (ret == -1) {
- ret = inode_ctx_get (oldloc->inode, this, &value);
- if (ret == -1) {
- goto out;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long)value;
- if (inode_ctx == NULL) {
- goto out;
- }
-
- LOCK (&inode_ctx->lock);
- {
- if (!inode_ctx->looked_up) {
- /* unwind in sp_rename_helper */
- need_unwind = 0;
- inode_ctx->need_unwind = 1;
- }
- }
- UNLOCK (&inode_ctx->lock);
- }
-
- } else {
- new_inode_can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- } else if (old_inode_need_lookup || new_inode_need_lookup) {
- if (old_inode_need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, oldloc,
- NULL);
- }
-
- if (new_inode_need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, newloc,
- NULL);
- }
- } else if (old_inode_can_wind && new_inode_can_wind) {
- STACK_WIND (frame, sp_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_setxattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict,
- flags);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (setxattr, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_setxattr_stub (frame, sp_setxattr_helper, loc, dict, flags);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_SETXATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (setxattr, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict,
- flags);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_removexattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (removexattr, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_removexattr_stub (frame, sp_removexattr_helper, loc, name);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_REMOVEXATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (removexattr, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- SP_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int32_t
-sp_getxattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_getxattr_stub (frame, sp_getxattr_helper, loc, name);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_GETXATTR);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_setdents (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dir_entry_t *entries, int32_t count)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
- dir_entry_t *trav = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- for (trav = entries->next; trav; trav = trav->next) {
- sp_cache_remove_entry (cache, trav->name, 0);
- }
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents, fd, flags, entries,
- count);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (setdents, frame, -1, errno);
- return 0;
-}
-
-
-int32_t
-sp_getdents_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dir_entry_t *entries,
- int32_t count)
-{
- dir_entry_t *trav = NULL;
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- if ((local == NULL) || (local->fd == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- cache = sp_get_cache_fd (this, local->fd);
- if (cache) {
- for (trav = entries->next; trav; trav = trav->next) {
- if (S_ISLNK (trav->buf.st_mode)) {
- sp_cache_remove_entry (cache, trav->name, 0);
- }
- }
-
- sp_cache_unref (cache);
- }
-
-out:
- SP_STACK_UNWIND (getdents, frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-
-int32_t
-sp_getdents (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, int32_t flags)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
- sp_local_t *local = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- local->fd = fd;
- frame->local = local;
-
- STACK_WIND (frame, sp_getdents_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents, fd, size, offset, flags);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (getdents, frame, -1, errno, NULL, -1);
- return 0;
-}
-
-
-int32_t
-sp_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- SP_STACK_UNWIND (checksum, frame, op_ret, op_errno, file_checksum,
- dir_checksum);
- return 0;
-}
-
-
-int32_t
-sp_checksum_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_checksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum, loc, flag);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (checksum, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out,
- op_errno, EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_checksum_stub (frame, sp_checksum_helper, loc, flag);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_CHECKSUM);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (checksum, frame, -1, op_errno, NULL, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_checksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum, loc, flag);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- SP_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int32_t
-sp_xattrop_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- sp_cache_t *cache = NULL;
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno,
- EINVAL);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- sp_cache_unref (cache);
- }
-
- stub = fop_xattrop_stub (frame, sp_xattrop_helper, loc, flags, dict);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_XATTROP);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- sp_cache_unref (cache);
- }
-
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (xattrop, frame, -1, errno, NULL);
- return 0;
-}
-
-int32_t
-sp_stbuf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int32_t
-sp_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_stat_stub (frame, sp_stat_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_STAT);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_access_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (access, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_access_stub (frame, sp_access_helper, loc, mask);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_ACCESS);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (access, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_inodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *lock)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, lock);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (inodelk, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
- int32_t cmd, struct flock *lock)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_inodelk_stub (frame, sp_inodelk_helper, volume, loc, cmd,
- lock);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_INODELK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (inodelk, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd,
- lock);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_entrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
-{
- uint64_t value = 0;
- sp_inode_ctx_t *inode_ctx = NULL;
- int32_t ret = 0, op_ret = -1, op_errno = -1;
-
- ret = inode_ctx_get (loc->inode, this, &value);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "context not set in inode "
- "(%p)", loc->inode);
- op_errno = EINVAL;
- goto unwind;
- }
-
- inode_ctx = (sp_inode_ctx_t *)(long) value;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno,
- EINVAL);
-
- LOCK (&inode_ctx->lock);
- {
- op_ret = inode_ctx->op_ret;
- op_errno = inode_ctx->op_errno;
- }
- UNLOCK (&inode_ctx->lock);
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk, volume, loc, basename,
- cmd, type);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (entrylk, frame, -1, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_errno = -1;
- call_stub_t *stub = NULL;
- char can_wind = 0, need_lookup = 0, need_unwind = 1;
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out,
- op_errno, EINVAL);
-
- stub = fop_entrylk_stub (frame, sp_entrylk_helper, volume, loc,
- basename, cmd, type);
- if (stub == NULL) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- sp_process_inode_ctx (frame, this, loc, stub, &need_unwind,
- &need_lookup, &can_wind, &op_errno,
- GF_FOP_ENTRYLK);
-
-out:
- if (need_unwind) {
- SP_STACK_UNWIND (entrylk, frame, -1, op_errno);
- } else if (need_lookup) {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk, volume, loc,
- basename, cmd, type);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_forget (xlator_t *this, inode_t *inode)
-{
- struct stat *buf = NULL;
- uint64_t value = 0;
-
- inode_ctx_del (inode, this, &value);
-
- if (value) {
- buf = (void *)(long)value;
- FREE (buf);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_release (xlator_t *this, fd_t *fd)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- sp_cache_t *cache = NULL;
-
- ret = fd_ctx_del (fd, this, &value);
- if (!ret) {
- fd_ctx = (void *)(long) value;
- cache = fd_ctx->cache;
- if (cache) {
- gf_log (this->name, GF_LOG_DEBUG, "cache hits: %lu, "
- "cache miss: %lu", cache->hits, cache->miss);
- }
-
- sp_fd_ctx_free (fd_ctx);
- }
-
- return 0;
-}
-
-
-
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = -1;
- sp_private_t *priv = NULL;
-
- if (!this->children || this->children->next) {
- gf_log ("stat-prefetch",
- GF_LOG_ERROR,
- "FATAL: translator %s does not have exactly one child "
- "node", this->name);
- goto out;
- }
-
- priv = CALLOC (1, sizeof(sp_private_t));
- LOCK_INIT (&priv->lock);
-
- this->private = priv;
-
- ret = 0;
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = sp_lookup,
- .readdir = sp_readdir,
- .readdirp = sp_readdir,
- .open = sp_open,
- .create = sp_create,
- .opendir = sp_opendir,
- .mkdir = sp_mkdir,
- .mknod = sp_mknod,
- .symlink = sp_symlink,
- .link = sp_link,
- .truncate = sp_truncate,
- .ftruncate = sp_ftruncate,
- .readlink = sp_readlink,
- .unlink = sp_unlink,
- .rmdir = sp_rmdir,
- .readv = sp_readv,
- .writev = sp_writev,
- .fsync = sp_fsync,
- .rename = sp_rename,
- .setxattr = sp_setxattr,
- .removexattr = sp_removexattr,
- .setdents = sp_setdents,
- .getdents = sp_getdents,
- .checksum = sp_checksum,
- .xattrop = sp_xattrop,
- .fxattrop = sp_fxattrop,
- .setattr = sp_setattr,
- .stat = sp_stat,
- .access = sp_access,
- .getxattr = sp_getxattr,
- .inodelk = sp_inodelk,
- .entrylk = sp_entrylk,
-};
-
-struct xlator_mops mops = {
-};
-
-struct xlator_cbks cbks = {
- .forget = sp_forget,
- .release = sp_release,
- .releasedir = sp_release
-};
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.h b/xlators/performance/stat-prefetch/src/stat-prefetch.h
deleted file mode 100644
index 3fcf4a0a63e..00000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _STAT_PREFETCH_H
-#define _STAT_PREFETCH_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "locking.h"
-#include "inode.h"
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "rbthash.h"
-#include "hashfn.h"
-#include "call-stub.h"
-#include <libgen.h>
-
-struct sp_cache {
- rbthash_table_t *table;
- xlator_t *this;
- uint64_t expected_offset; /* Offset where the next read will
- * happen.
- */
- gf_lock_t lock;
- unsigned long miss;
- unsigned long hits;
- uint32_t ref;
-};
-typedef struct sp_cache sp_cache_t;
-
-struct sp_fd_ctx {
- sp_cache_t *cache;
- inode_t *parent_inode; /*
- * inode corresponding to dirname (path)
- */
- char *name; /*
- * basename of path on which this fd is
- * opened
- */
-};
-typedef struct sp_fd_ctx sp_fd_ctx_t;
-
-struct sp_local {
- loc_t loc;
- fd_t *fd;
- char is_lookup;
-};
-typedef struct sp_local sp_local_t;
-
-struct sp_inode_ctx {
- char looked_up;
- char lookup_in_progress;
- char need_unwind;
- int32_t op_ret;
- int32_t op_errno;
- struct stat stbuf;
- gf_lock_t lock;
- struct list_head waiting_ops;
-};
-typedef struct sp_inode_ctx sp_inode_ctx_t;
-
-struct sp_private {
- struct mem_pool *mem_pool;
- gf_lock_t lock;
-};
-typedef struct sp_private sp_private_t;
-
-void sp_local_free (sp_local_t *local);
-
-#define SP_STACK_UNWIND(op, frame, params ...) do { \
- sp_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND_STRICT (op, frame, params); \
- sp_local_free (__local); \
-} while (0)
-
-#define SP_STACK_DESTROY(frame) do { \
- sp_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- sp_local_free (__local); \
-} while (0)
-
-#endif /* #ifndef _STAT_PREFETCH_H */
diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am
deleted file mode 100644
index 06e85fc9216..00000000000
--- a/xlators/performance/symlink-cache/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-xlator_LTLIBRARIES = symlink-cache.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance
-
-symlink_cache_la_LDFLAGS = -module -avoidversion
-
-symlink_cache_la_SOURCES = symlink-cache.c
-symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c
deleted file mode 100644
index 22b1c5482ea..00000000000
--- a/xlators/performance/symlink-cache/src/symlink-cache.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "list.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "common-utils.h"
-
-struct symlink_cache {
- time_t ctime;
- char *readlink;
-};
-
-
-static int
-symlink_inode_ctx_get (inode_t *inode, xlator_t *this, void **ctx)
-{
- int ret = 0;
- uint64_t tmp_ctx = 0;
- ret = inode_ctx_get (inode, this, &tmp_ctx);
- if (-1 == ret)
- gf_log (this->name, GF_LOG_ERROR, "dict get failed");
- else
- *ctx = (void *)(long)tmp_ctx;
-
- return 0;
-}
-
-
-static int
-symlink_inode_ctx_set (inode_t *inode, xlator_t *this, void *ctx)
-{
- int ret = 0;
- ret = inode_ctx_put (inode, this, (uint64_t)(long) ctx);
- if (-1 == ret)
- gf_log (this->name, GF_LOG_ERROR, "dict set failed");
-
- return 0;
-}
-
-
-int
-sc_cache_update (xlator_t *this, inode_t *inode, const char *link)
-{
- struct symlink_cache *sc = NULL;
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
- if (!sc)
- return 0;
-
- if (!sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "updating cache: %s", link);
-
- sc->readlink = strdup (link);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "not updating existing cache: %s with %s",
- sc->readlink, link);
- }
-
- return 0;
-}
-
-
-int
-sc_cache_set (xlator_t *this, inode_t *inode, struct stat *buf,
- const char *link)
-{
- struct symlink_cache *sc = NULL;
- int ret = -1;
- int need_set = 0;
-
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
- if (!sc) {
- need_set = 1;
- sc = CALLOC (1, sizeof (*sc));
- if (!sc) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto err;
- }
- }
-
- if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "replacing old cache: %s with new cache: %s",
- sc->readlink, link);
- FREE (sc->readlink);
- sc->readlink = NULL;
- }
-
- if (link) {
- sc->readlink = strdup (link);
- if (!sc->readlink) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto err;
- }
- }
-
- sc->ctime = buf->st_ctime;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting symlink cache: %s", link);
-
- if (need_set) {
- ret = symlink_inode_ctx_set (inode, this, sc);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set inode context (%s)",
- strerror (-ret));
- goto err;
- }
- }
-
- return 0;
-err:
-
- if (sc) {
- if (sc->readlink)
- FREE (sc->readlink);
- sc->readlink = NULL;
- FREE (sc);
- }
-
- return -1;
-}
-
-
-int
-sc_cache_flush (xlator_t *this, inode_t *inode)
-{
- struct symlink_cache *sc = NULL;
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
- if (!sc)
- return 0;
-
- if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flushing cache: %s", sc->readlink);
-
- FREE (sc->readlink);
- sc->readlink = NULL;
- }
-
- FREE (sc);
-
- return 0;
-}
-
-
-int
-sc_cache_validate (xlator_t *this, inode_t *inode, struct stat *buf)
-{
- struct symlink_cache *sc = NULL;
- uint64_t tmp_sc = 0;
-
- if (!S_ISLNK (buf->st_mode)) {
- sc_cache_flush (this, inode);
- return 0;
- }
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
-
- if (!sc) {
- sc_cache_set (this, inode, buf, NULL);
- inode_ctx_get (inode, this, &tmp_sc);
-
- if (!sc) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- return 0;
- }
- sc = (struct symlink_cache *)(long)tmp_sc;
- }
-
- if (sc->ctime == buf->st_ctime)
- return 0;
-
- /* STALE */
- if (sc->readlink) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flushing cache: %s", sc->readlink);
-
- FREE (sc->readlink);
- sc->readlink = NULL;
- }
-
- sc->ctime = buf->st_ctime;
-
- return 0;
-}
-
-
-
-int
-sc_cache_get (xlator_t *this, inode_t *inode, char **link)
-{
- struct symlink_cache *sc = NULL;
-
- symlink_inode_ctx_get (inode, this, VOID(&sc));
-
- if (!sc)
- return 0;
-
- if (link && sc->readlink)
- *link = strdup (sc->readlink);
- return 0;
-}
-
-
-int
-sc_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- const char *link, struct stat *sbuf)
-{
- if (op_ret > 0)
- sc_cache_update (this, frame->local, link);
-
- inode_unref (frame->local);
- frame->local = NULL;
-
- STACK_UNWIND (frame, op_ret, op_errno, link, sbuf);
- return 0;
-}
-
-
-int
-sc_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
-{
- char *link = NULL;
- struct stat buf = {0, };
-
- sc_cache_get (this, loc->inode, &link);
-
- if (link) {
- /* cache hit */
- gf_log (this->name, GF_LOG_DEBUG,
- "cache hit %s -> %s",
- loc->path, link);
-
- /*
- libglusterfsclient, nfs or any other translators
- using buf in readlink_cbk should be aware that @buf
- is 0 filled
- */
- STACK_UNWIND (frame, strlen (link), 0, link, &buf);
- FREE (link);
- return 0;
- }
-
- frame->local = inode_ref (loc->inode);
-
- STACK_WIND (frame, sc_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc, size);
-
- return 0;
-}
-
-
-int
-sc_symlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *buf, struct stat *preparent,
- struct stat *postparent)
-{
- if (op_ret == 0) {
- if (frame->local) {
- sc_cache_set (this, inode, buf, frame->local);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, preparent,
- postparent);
- return 0;
-}
-
-
-int
-sc_symlink (call_frame_t *frame, xlator_t *this,
- const char *dst, loc_t *src)
-{
- frame->local = strdup (dst);
-
- STACK_WIND (frame, sc_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- dst, src);
-
- return 0;
-}
-
-
-int
-sc_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr,
- struct stat *postparent)
-{
- if (op_ret == 0)
- sc_cache_validate (this, inode, buf);
- else
- sc_cache_flush (this, inode);
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr, postparent);
- return 0;
-}
-
-
-int
-sc_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- STACK_WIND (frame, sc_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
- return 0;
-}
-
-
-int
-sc_forget (xlator_t *this,
- inode_t *inode)
-{
- sc_cache_flush (this, inode);
-
- return 0;
-}
-
-
-int32_t
-init (xlator_t *this)
-{
-
- if (!this->children || this->children->next)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- return 0;
-}
-
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = sc_lookup,
- .symlink = sc_symlink,
- .readlink = sc_readlink,
-};
-
-struct xlator_mops mops = {
-};
-
-struct xlator_cbks cbks = {
- .forget = sc_forget,
-};
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am
index f800abad50d..a6a16fcc080 100644
--- a/xlators/performance/write-behind/src/Makefile.am
+++ b/xlators/performance/write-behind/src/Makefile.am
@@ -1,12 +1,16 @@
xlator_LTLIBRARIES = write-behind.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-write_behind_la_LDFLAGS = -module -avoidversion
+write_behind_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
write_behind_la_SOURCES = write-behind.c
write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = write-behind-mem-types.h write-behind-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h
new file mode 100644
index 00000000000..a0647299150
--- /dev/null
+++ b/xlators/performance/write-behind/src/write-behind-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __WB_MEM_TYPES_H__
+#define __WB_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_wb_mem_types_ {
+ gf_wb_mt_wb_file_t = gf_common_mt_end + 1,
+ gf_wb_mt_wb_request_t,
+ gf_wb_mt_iovec,
+ gf_wb_mt_wb_conf_t,
+ gf_wb_mt_wb_inode_t,
+ gf_wb_mt_end
+};
+#endif
diff --git a/xlators/performance/write-behind/src/write-behind-messages.h b/xlators/performance/write-behind/src/write-behind-messages.h
new file mode 100644
index 00000000000..e9ea474879b
--- /dev/null
+++ b/xlators/performance/write-behind/src/write-behind-messages.h
@@ -0,0 +1,31 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _WRITE_BEHIND_MESSAGES_H_
+#define _WRITE_BEHIND_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(WRITE_BEHIND, WRITE_BEHIND_MSG_EXCEEDED_MAX_SIZE,
+ WRITE_BEHIND_MSG_INIT_FAILED, WRITE_BEHIND_MSG_INVALID_ARGUMENT,
+ WRITE_BEHIND_MSG_NO_MEMORY, WRITE_BEHIND_MSG_SIZE_NOT_SET,
+ WRITE_BEHIND_MSG_VOL_MISCONFIGURED,
+ WRITE_BEHIND_MSG_RES_UNAVAILABLE);
+
+#endif /* _WRITE_BEHIND_MESSAGES_H_ */
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 883d25e9666..00cfca016e6 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -1,2656 +1,3278 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*TODO: check for non null wb_file_data before getting wb_file */
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "list.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "common-utils.h"
-#include "call-stub.h"
-#include "statedump.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/defaults.h>
+#include "write-behind-mem-types.h"
+#include "write-behind-messages.h"
#define MAX_VECTOR_COUNT 8
#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
-#define WB_WINDOW_SIZE 1048576 /* 1MB */
-
+#define WB_WINDOW_SIZE 1048576 /* 1MB */
+
typedef struct list_head list_head_t;
struct wb_conf;
-struct wb_page;
-struct wb_file;
-
-
-typedef struct wb_file {
- int disabled;
- uint64_t disable_till;
- size_t window_conf;
- size_t window_current;
- size_t aggregate_current;
- int32_t refcount;
- int32_t op_ret;
- int32_t op_errno;
- list_head_t request;
- list_head_t passive_requests;
- fd_t *fd;
- gf_lock_t lock;
- xlator_t *this;
-}wb_file_t;
-
+struct wb_inode;
+
+typedef struct wb_inode {
+ ssize_t window_conf;
+ ssize_t window_current;
+ ssize_t transit; /* size of data stack_wound, and yet
+ to be fulfilled (wb_fulfill_cbk).
+ used for trickling_writes
+ */
+
+ list_head_t all; /* All requests, from enqueue() till destroy().
+ Used only for resetting generation
+ number when empty.
+ */
+ list_head_t todo; /* Work to do (i.e, STACK_WIND to server).
+ Once we STACK_WIND, the entry is taken
+ off the list. If it is non-sync write,
+ then we continue to track it via @liability
+ or @temptation depending on the status
+ of its writeback.
+ */
+ list_head_t liability; /* Non-sync writes which are lied
+ (STACK_UNWIND'ed to caller) but ack
+ from server not yet complete. This
+ is the "liability" which we hold, and
+ must guarantee that dependent operations
+ which arrive later (which overlap, etc.)
+ are issued only after their dependencies
+ in this list are "fulfilled".
+
+ Server acks for entries in this list
+ shrinks the window.
+
+ The sum total of all req->write_size
+ of entries in this list must be kept less
+ than the permitted window size.
+ */
+ list_head_t temptation; /* Operations for which we are tempted
+ to 'lie' (write-behind), but temporarily
+ holding off (because of insufficient
+ window capacity, etc.)
+
+ This is the list to look at to grow
+ the window (in __wb_pick_unwinds()).
+
+ Entries typically get chosen from
+ write-behind from this list, and therefore
+ get "upgraded" to the "liability" list.
+ */
+ list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC
+ which are currently STACK_WIND'ed towards the server.
+ This is for guaranteeing that no two overlapping
+ writes are in progress at the same time. Modules
+ like eager-lock in AFR depend on this behavior.
+ */
+ list_head_t invalidate_list; /* list of wb_inodes that were marked for
+ * iatt invalidation due to requests in
+ * liability queue fulfilled while there
+ * was a readdirp session on parent
+ * directory. For a directory inode, this
+ * list points to list of children.
+ */
+ uint64_t gen; /* Liability generation number. Represents
+ the current 'state' of liability. Every
+ new addition to the liability list bumps
+ the generation number.
+
+ a newly arrived request is only required
+ to perform causal checks against the entries
+ in the liability list which were present
+ at the time of its addition. the generation
+ number at the time of its addition is stored
+ in the request and used during checks.
+
+ the liability list can grow while the request
+ waits in the todo list waiting for its
+ dependent operations to complete. however
+ it is not of the request's concern to depend
+ itself on those new entries which arrived
+ after it arrived (i.e, those that have a
+ liability generation higher than itself)
+ */
+ size_t size; /* Size of the file to catch write after EOF. */
+ gf_lock_t lock;
+ xlator_t *this;
+ inode_t *inode;
+ int dontsync; /* If positive, don't pick lies for
+ * winding. This is needed to break infinite
+ * recursion during invocation of
+ * wb_process_queue from
+ * wb_fulfill_cbk in case of an
+ * error during fulfill.
+ */
+ gf_atomic_int32_t readdirps;
+ gf_atomic_int8_t invalidate;
+
+} wb_inode_t;
typedef struct wb_request {
- list_head_t list;
- list_head_t winds;
- list_head_t unwinds;
- list_head_t other_requests;
- call_stub_t *stub;
- size_t write_size;
- int32_t refcount;
- wb_file_t *file;
- union {
- struct {
- char write_behind;
- char stack_wound;
- char got_reply;
- char virgin;
- }write_request;
-
- struct {
- char marked_for_resume;
- }other_requests;
- }flags;
+ list_head_t all;
+ list_head_t todo;
+ list_head_t lie; /* either in @liability or @temptation */
+ list_head_t winds;
+ list_head_t unwinds;
+ list_head_t wip;
+
+ call_stub_t *stub;
+
+ ssize_t write_size; /* currently held size
+ (after collapsing) */
+ size_t orig_size; /* size which arrived with the request.
+ This is the size by which we grow
+ the window when unwinding the frame.
+ */
+ size_t total_size; /* valid only in @head in wb_fulfill().
+ This is the size with which we perform
+ STACK_WIND to server and therefore the
+ amount by which we shrink the window.
+ */
+
+ int op_ret;
+ int op_errno;
+
+ int32_t refcount;
+ wb_inode_t *wb_inode;
+ glusterfs_fop_t fop;
+ gf_lkowner_t lk_owner;
+ pid_t client_pid;
+ struct iobref *iobref;
+ uint64_t gen; /* inode liability state at the time of
+ request arrival */
+
+ fd_t *fd;
+ int wind_count; /* number of sync-attempts. Only
+ for debug purposes */
+ struct {
+ size_t size; /* 0 size == till infinity */
+ off_t off;
+ int append : 1; /* offset is invalid. only one
+ outstanding append at a time */
+ int tempted : 1; /* true only for non-sync writes */
+ int lied : 1; /* sin committed */
+ int fulfilled : 1; /* got server acknowledgement */
+ int go : 1; /* enough aggregating, good to go */
+ } ordering;
+
+ /* for debug purposes. A request might outlive the fop it is
+ * representing. So, preserve essential info for logging.
+ */
+ uint64_t unique;
+ uuid_t gfid;
} wb_request_t;
+typedef struct wb_conf {
+ uint64_t aggregate_size;
+ uint64_t page_size;
+ uint64_t window_size;
+ gf_boolean_t flush_behind;
+ gf_boolean_t trickling_writes;
+ gf_boolean_t strict_write_ordering;
+ gf_boolean_t strict_O_DIRECT;
+ gf_boolean_t resync_after_fsync;
+} wb_conf_t;
+
+wb_inode_t *
+__wb_inode_ctx_get(xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ wb_inode_t *wb_inode = NULL;
+ int ret = 0;
-struct wb_conf {
- uint64_t aggregate_size;
- uint64_t window_size;
- uint64_t disable_till;
- gf_boolean_t enable_O_SYNC;
- gf_boolean_t flush_behind;
- gf_boolean_t enable_trickling_writes;
-};
+ ret = __inode_ctx_get(inode, this, &value);
+ if (ret)
+ return NULL;
+ wb_inode = (wb_inode_t *)(unsigned long)value;
-typedef struct wb_local {
- list_head_t winds;
- int32_t flags;
- int32_t wbflags;
- struct wb_file *file;
- wb_request_t *request;
- int op_ret;
- int op_errno;
- call_frame_t *frame;
- int32_t reply_count;
-} wb_local_t;
+ return wb_inode;
+}
+
+wb_inode_t *
+wb_inode_ctx_get(xlator_t *this, inode_t *inode)
+{
+ wb_inode_t *wb_inode = NULL;
+ GF_VALIDATE_OR_GOTO("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
-typedef struct wb_conf wb_conf_t;
-typedef struct wb_page wb_page_t;
+ LOCK(&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get(this, inode);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return wb_inode;
+}
+static void
+wb_set_invalidate(wb_inode_t *wb_inode)
+{
+ int readdirps = 0;
+ inode_t *parent_inode = NULL;
+ wb_inode_t *wb_parent_inode = NULL;
-int32_t
-wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all);
+ parent_inode = inode_parent(wb_inode->inode, NULL, NULL);
+ if (parent_inode)
+ wb_parent_inode = wb_inode_ctx_get(wb_inode->this, parent_inode);
-ssize_t
-wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds);
+ if (wb_parent_inode) {
+ LOCK(&wb_parent_inode->lock);
+ {
+ readdirps = GF_ATOMIC_GET(wb_parent_inode->readdirps);
+ if (readdirps && list_empty(&wb_inode->invalidate_list)) {
+ inode_ref(wb_inode->inode);
+ GF_ATOMIC_INIT(wb_inode->invalidate, 1);
+ list_add(&wb_inode->invalidate_list,
+ &wb_parent_inode->invalidate_list);
+ }
+ }
+ UNLOCK(&wb_parent_inode->lock);
+ } else {
+ GF_ATOMIC_INIT(wb_inode->invalidate, 0);
+ }
+
+ if (parent_inode)
+ inode_unref(parent_inode);
+
+ return;
+}
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size,
- char wind_all, char enable_trickling_writes);
+void
+wb_process_queue(wb_inode_t *wb_inode);
+/*
+ Below is a succinct explanation of the code deciding whether two regions
+ overlap, from Pavan <tcp@gluster.com>.
-static void
-__wb_request_unref (wb_request_t *this)
+ For any two ranges to be non-overlapping, either the end of the first
+ range is lesser than the start of the second, or vice versa. Example -
+
+ <---------> <-------------->
+ p q x y
+
+ ( q < x ) or (y < p) = > No overlap.
+
+ To check for *overlap*, we can negate this (using de morgan's laws), and
+ it becomes -
+
+ (q >= x ) and (y >= p)
+
+ Either that, or you write the negation using -
+
+ if (! ((q < x) or (y < p)) ) {
+ "Overlap"
+ }
+*/
+
+gf_boolean_t
+wb_requests_overlap(wb_request_t *req1, wb_request_t *req2)
{
- if (this->refcount <= 0) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "refcount(%d) is <= 0", this->refcount);
- return;
- }
+ uint64_t r1_start = 0;
+ uint64_t r1_end = 0;
+ uint64_t r2_start = 0;
+ uint64_t r2_end = 0;
+ gf_boolean_t do_overlap = _gf_false;
+
+ r1_start = req1->ordering.off;
+ if (req1->ordering.size)
+ r1_end = r1_start + req1->ordering.size - 1;
+ else
+ r1_end = ULLONG_MAX;
+
+ r2_start = req2->ordering.off;
+ if (req2->ordering.size)
+ r2_end = r2_start + req2->ordering.size - 1;
+ else
+ r2_end = ULLONG_MAX;
+
+ do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start));
+
+ return do_overlap;
+}
- this->refcount--;
- if (this->refcount == 0) {
- list_del_init (&this->list);
- if (this->stub && this->stub->fop == GF_FOP_WRITE) {
- call_stub_destroy (this->stub);
- }
+gf_boolean_t
+wb_requests_conflict(wb_request_t *lie, wb_request_t *req)
+{
+ wb_conf_t *conf = NULL;
- FREE (this);
- }
+ conf = req->wb_inode->this->private;
+
+ if (lie == req)
+ /* request cannot conflict with itself */
+ return _gf_false;
+
+ if (lie->gen >= req->gen)
+ /* this liability entry was behind
+ us in the todo list */
+ return _gf_false;
+
+ if (lie->ordering.append)
+ /* all modifications wait for the completion
+ of outstanding append */
+ return _gf_true;
+
+ if (conf->strict_write_ordering)
+ /* We are sure (lie->gen < req->gen) by now. So
+ skip overlap check if strict write ordering is
+ requested and always return "conflict" against a
+ lower generation lie. */
+ return _gf_true;
+
+ return wb_requests_overlap(lie, req);
}
+wb_request_t *
+wb_liability_has_conflict(wb_inode_t *wb_inode, wb_request_t *req)
+{
+ wb_request_t *each = NULL;
+
+ list_for_each_entry(each, &wb_inode->liability, lie)
+ {
+ if (wb_requests_conflict(each, req) && (!each->ordering.fulfilled))
+ /* A fulfilled request shouldn't block another
+ * request (even a dependent one) from winding.
+ */
+ return each;
+ }
+
+ return NULL;
+}
-static void
-wb_request_unref (wb_request_t *this)
+wb_request_t *
+wb_wip_has_conflict(wb_inode_t *wb_inode, wb_request_t *req)
{
- wb_file_t *file = NULL;
- if (this == NULL) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "request is NULL");
- return;
+ wb_request_t *each = NULL;
+
+ if (req->stub->fop != GF_FOP_WRITE)
+ /* non-writes fundamentally never conflict with WIP requests */
+ return NULL;
+
+ list_for_each_entry(each, &wb_inode->wip, wip)
+ {
+ if (each == req)
+ /* request never conflicts with itself,
+ though this condition should never occur.
+ */
+ continue;
+
+ if (wb_requests_overlap(each, req))
+ return each;
+ }
+
+ return NULL;
+}
+
+static int
+__wb_request_unref(wb_request_t *req)
+{
+ int ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ wb_inode = req->wb_inode;
+
+ if (req->refcount <= 0) {
+ uuid_utoa_r(req->gfid, gfid);
+
+ gf_msg(
+ "wb-request", GF_LOG_WARNING, 0, WRITE_BEHIND_MSG_RES_UNAVAILABLE,
+ "(unique=%" PRIu64 ", fop=%s, gfid=%s, gen=%" PRIu64
+ "): "
+ "refcount(%d) is <= 0 ",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen, req->refcount);
+ goto out;
+ }
+
+ ret = --req->refcount;
+ if (req->refcount == 0) {
+ uuid_utoa_r(req->gfid, gfid);
+
+ gf_log_callingfn(wb_inode->this->name, GF_LOG_DEBUG,
+ "(unique = %" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): destroying request, "
+ "removing from all queues",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen);
+
+ list_del_init(&req->todo);
+ list_del_init(&req->lie);
+ list_del_init(&req->wip);
+
+ list_del_init(&req->all);
+ if (list_empty(&wb_inode->all)) {
+ wb_inode->gen = 0;
+ /* in case of accounting errors? */
+ wb_inode->window_current = 0;
}
-
- file = this->file;
- LOCK (&file->lock);
- {
- __wb_request_unref (this);
+
+ list_del_init(&req->winds);
+ list_del_init(&req->unwinds);
+
+ if (req->stub) {
+ call_stub_destroy(req->stub);
+ req->stub = NULL;
}
- UNLOCK (&file->lock);
+
+ if (req->iobref)
+ iobref_unref(req->iobref);
+
+ if (req->fd)
+ fd_unref(req->fd);
+
+ GF_FREE(req);
+ }
+out:
+ return ret;
}
+static int
+wb_request_unref(wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("write-behind", req, out);
+
+ wb_inode = req->wb_inode;
+
+ LOCK(&wb_inode->lock);
+ {
+ ret = __wb_request_unref(req);
+ }
+ UNLOCK(&wb_inode->lock);
+
+out:
+ return ret;
+}
static wb_request_t *
-__wb_request_ref (wb_request_t *this)
+__wb_request_ref(wb_request_t *req)
{
- if (this->refcount < 0) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "refcount(%d) is < 0", this->refcount);
- return NULL;
- }
+ GF_VALIDATE_OR_GOTO("write-behind", req, out);
- this->refcount++;
- return this;
-}
+ if (req->refcount < 0) {
+ gf_msg("wb-request", GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_RES_UNAVAILABLE, "refcount(%d) is < 0",
+ req->refcount);
+ req = NULL;
+ goto out;
+ }
+
+ req->refcount++;
+out:
+ return req;
+}
wb_request_t *
-wb_request_ref (wb_request_t *this)
+wb_request_ref(wb_request_t *req)
{
- wb_file_t *file = NULL;
- if (this == NULL) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "request is NULL");
- return NULL;
- }
+ wb_inode_t *wb_inode = NULL;
- file = this->file;
- LOCK (&file->lock);
- {
- this = __wb_request_ref (this);
- }
- UNLOCK (&file->lock);
+ GF_VALIDATE_OR_GOTO("write-behind", req, out);
+
+ wb_inode = req->wb_inode;
+ LOCK(&wb_inode->lock);
+ {
+ req = __wb_request_ref(req);
+ }
+ UNLOCK(&wb_inode->lock);
- return this;
+out:
+ return req;
}
+gf_boolean_t
+wb_enqueue_common(wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
+{
+ wb_request_t *req = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("write-behind", wb_inode, out);
+ GF_VALIDATE_OR_GOTO(wb_inode->this->name, stub, out);
+
+ req = GF_CALLOC(1, sizeof(*req), gf_wb_mt_wb_request_t);
+ if (!req)
+ goto out;
+
+ INIT_LIST_HEAD(&req->all);
+ INIT_LIST_HEAD(&req->todo);
+ INIT_LIST_HEAD(&req->lie);
+ INIT_LIST_HEAD(&req->winds);
+ INIT_LIST_HEAD(&req->unwinds);
+ INIT_LIST_HEAD(&req->wip);
+
+ req->stub = stub;
+ req->wb_inode = wb_inode;
+ req->fop = stub->fop;
+ req->ordering.tempted = tempted;
+ req->unique = stub->frame->root->unique;
+
+ inode = ((stub->args.fd != NULL) ? stub->args.fd->inode
+ : stub->args.loc.inode);
+
+ if (inode)
+ gf_uuid_copy(req->gfid, inode->gfid);
+
+ if (stub->fop == GF_FOP_WRITE) {
+ req->write_size = iov_length(stub->args.vector, stub->args.count);
+
+ /* req->write_size can change as we collapse
+ small writes. But the window needs to grow
+ only by how much we acknowledge the app. so
+ copy the original size in orig_size for the
+ purpose of accounting.
+ */
+ req->orig_size = req->write_size;
-wb_request_t *
-wb_enqueue (wb_file_t *file, call_stub_t *stub)
-{
- wb_request_t *request = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- int32_t count = 0;
-
- request = CALLOC (1, sizeof (*request));
- if (request == NULL) {
- goto out;
- }
+ /* Let's be optimistic that we can
+ lie about it
+ */
+ req->op_ret = req->write_size;
+ req->op_errno = 0;
+
+ if (stub->args.fd && (stub->args.fd->flags & O_APPEND))
+ req->ordering.append = 1;
+ }
+
+ req->lk_owner = stub->frame->root->lk_owner;
+ req->client_pid = stub->frame->root->pid;
+
+ switch (stub->fop) {
+ case GF_FOP_WRITE:
+ LOCK(&wb_inode->lock);
+ {
+ if (wb_inode->size < stub->args.offset) {
+ req->ordering.off = wb_inode->size;
+ req->ordering.size = stub->args.offset + req->write_size -
+ wb_inode->size;
+ } else {
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = req->write_size;
+ }
- INIT_LIST_HEAD (&request->list);
- INIT_LIST_HEAD (&request->winds);
- INIT_LIST_HEAD (&request->unwinds);
- INIT_LIST_HEAD (&request->other_requests);
+ if (wb_inode->size < stub->args.offset + req->write_size)
+ wb_inode->size = stub->args.offset + req->write_size;
+ }
+ UNLOCK(&wb_inode->lock);
- request->stub = stub;
- request->file = file;
+ req->fd = fd_ref(stub->args.fd);
- frame = stub->frame;
- local = frame->local;
- if (local) {
- local->request = request;
- }
+ break;
+ case GF_FOP_READ:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = stub->args.size;
- if (stub->fop == GF_FOP_WRITE) {
- vector = stub->args.writev.vector;
- count = stub->args.writev.count;
+ req->fd = fd_ref(stub->args.fd);
- frame = stub->frame;
- local = frame->local;
- request->write_size = iov_length (vector, count);
- local->op_ret = request->write_size;
- local->op_errno = 0;
+ break;
+ case GF_FOP_TRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->size = req->ordering.off;
+ }
+ UNLOCK(&wb_inode->lock);
+ break;
+ case GF_FOP_FTRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->size = req->ordering.off;
+ }
+ UNLOCK(&wb_inode->lock);
- request->flags.write_request.virgin = 1;
- }
+ req->fd = fd_ref(stub->args.fd);
- LOCK (&file->lock);
- {
- list_add_tail (&request->list, &file->request);
- if (stub->fop == GF_FOP_WRITE) {
- /* reference for stack winding */
- __wb_request_ref (request);
+ break;
+ default:
+ if (stub && stub->args.fd)
+ req->fd = fd_ref(stub->args.fd);
- /* reference for stack unwinding */
- __wb_request_ref (request);
+ break;
+ }
- file->aggregate_current += request->write_size;
- } else {
- /*reference for resuming */
- __wb_request_ref (request);
- }
+ LOCK(&wb_inode->lock);
+ {
+ list_add_tail(&req->all, &wb_inode->all);
+
+ req->gen = wb_inode->gen;
+
+ list_add_tail(&req->todo, &wb_inode->todo);
+ __wb_request_ref(req); /* for wind */
+
+ if (req->ordering.tempted) {
+ list_add_tail(&req->lie, &wb_inode->temptation);
+ __wb_request_ref(req); /* for unwind */
}
- UNLOCK (&file->lock);
+ }
+ UNLOCK(&wb_inode->lock);
out:
- return request;
+ if (!req)
+ return _gf_false;
+
+ return _gf_true;
}
+gf_boolean_t
+wb_enqueue(wb_inode_t *wb_inode, call_stub_t *stub)
+{
+ return wb_enqueue_common(wb_inode, stub, 0);
+}
-wb_file_t *
-wb_file_create (xlator_t *this, fd_t *fd)
+gf_boolean_t
+wb_enqueue_tempted(wb_inode_t *wb_inode, call_stub_t *stub)
{
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
+ return wb_enqueue_common(wb_inode, stub, 1);
+}
- file = CALLOC (1, sizeof (*file));
- if (file == NULL) {
- goto out;
- }
+wb_inode_t *
+__wb_inode_create(xlator_t *this, inode_t *inode)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
- INIT_LIST_HEAD (&file->request);
- INIT_LIST_HEAD (&file->passive_requests);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- /*
- fd_ref() not required, file should never decide the existance of
- an fd
- */
- file->fd= fd;
- file->disable_till = conf->disable_till;
- file->this = this;
- file->refcount = 1;
- file->window_conf = conf->window_size;
+ conf = this->private;
+
+ wb_inode = GF_CALLOC(1, sizeof(*wb_inode), gf_wb_mt_wb_inode_t);
+ if (!wb_inode)
+ goto out;
+
+ INIT_LIST_HEAD(&wb_inode->all);
+ INIT_LIST_HEAD(&wb_inode->todo);
+ INIT_LIST_HEAD(&wb_inode->liability);
+ INIT_LIST_HEAD(&wb_inode->temptation);
+ INIT_LIST_HEAD(&wb_inode->wip);
+ INIT_LIST_HEAD(&wb_inode->invalidate_list);
+
+ wb_inode->this = this;
+
+ wb_inode->window_conf = conf->window_size;
+ wb_inode->inode = inode;
- fd_ctx_set (fd, this, (uint64_t)(long)file);
+ LOCK_INIT(&wb_inode->lock);
+ GF_ATOMIC_INIT(wb_inode->invalidate, 0);
+ GF_ATOMIC_INIT(wb_inode->readdirps, 0);
+
+ ret = __inode_ctx_put(inode, this, (uint64_t)(unsigned long)wb_inode);
+ if (ret) {
+ GF_FREE(wb_inode);
+ wb_inode = NULL;
+ }
out:
- return file;
+ return wb_inode;
}
-void
-wb_file_destroy (wb_file_t *file)
+wb_inode_t *
+wb_inode_create(xlator_t *this, inode_t *inode)
{
- int32_t refcount = 0;
+ wb_inode_t *wb_inode = NULL;
- LOCK (&file->lock);
- {
- refcount = --file->refcount;
- }
- UNLOCK (&file->lock);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (!refcount){
- LOCK_DESTROY (&file->lock);
- FREE (file);
- }
+ LOCK(&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get(this, inode);
+ if (!wb_inode)
+ wb_inode = __wb_inode_create(this, inode);
+ }
+ UNLOCK(&inode->lock);
- return;
+out:
+ return wb_inode;
}
+void
+wb_inode_destroy(wb_inode_t *wb_inode)
+{
+ GF_VALIDATE_OR_GOTO("write-behind", wb_inode, out);
+
+ GF_ASSERT(list_empty(&wb_inode->todo));
+ GF_ASSERT(list_empty(&wb_inode->liability));
+ GF_ASSERT(list_empty(&wb_inode->temptation));
-int32_t
-wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *prebuf, struct stat *postbuf)
+ LOCK_DESTROY(&wb_inode->lock);
+ GF_FREE(wb_inode);
+out:
+ return;
+}
+
+void
+__wb_fulfill_request(wb_request_t *req)
{
- wb_local_t *local = NULL;
- list_head_t *winds = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL, *dummy = NULL;
- wb_local_t *per_request_local = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
+ wb_inode_t *wb_inode = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ wb_inode = req->wb_inode;
+
+ req->ordering.fulfilled = 1;
+ wb_inode->window_current -= req->total_size;
+ wb_inode->transit -= req->total_size;
+
+ uuid_utoa_r(req->gfid, gfid);
+
+ gf_log_callingfn(wb_inode->this->name, GF_LOG_DEBUG,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): request fulfilled. "
+ "removing the request from liability queue? = %s",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen,
+ req->ordering.lied ? "yes" : "no");
+
+ if (req->ordering.lied) {
+ /* 1. If yes, request is in liability queue and hence can be
+ safely removed from list.
+ 2. If no, request is in temptation queue and hence should be
+ left in the queue so that wb_pick_unwinds picks it up
+ */
+ list_del_init(&req->lie);
+ } else {
+ /* TODO: fail the req->frame with error if
+ necessary
+ */
+ }
+ list_del_init(&req->wip);
+ __wb_request_unref(req);
+}
- local = frame->local;
- winds = &local->winds;
- file = local->file;
+/* get a flush/fsync waiting on req */
+wb_request_t *
+__wb_request_waiting_on(wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *trav = NULL;
- LOCK (&file->lock);
- {
- list_for_each_entry_safe (request, dummy, winds, winds) {
- request->flags.write_request.got_reply = 1;
+ wb_inode = req->wb_inode;
- if (!request->flags.write_request.write_behind
- && (op_ret == -1)) {
- per_request_local = request->stub->frame->local;
- per_request_local->op_ret = op_ret;
- per_request_local->op_errno = op_errno;
- }
+ list_for_each_entry(trav, &wb_inode->todo, todo)
+ {
+ if (((trav->stub->fop == GF_FOP_FLUSH) ||
+ (trav->stub->fop == GF_FOP_FSYNC)) &&
+ (trav->gen >= req->gen))
+ return trav;
+ }
- if (request->flags.write_request.write_behind) {
- file->window_current -= request->write_size;
- }
+ return NULL;
+}
- __wb_request_unref (request);
- }
-
- if (op_ret == -1) {
- file->op_ret = op_ret;
- file->op_errno = op_errno;
- }
- fd = file->fd;
- }
- UNLOCK (&file->lock);
-
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+void
+__wb_add_request_for_retry(wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
- /* safe place to do fd_unref */
- fd_unref (fd);
+ if (!req)
+ goto out;
- STACK_DESTROY (frame->root);
+ wb_inode = req->wb_inode;
- return 0;
+ /* response was unwound and no waiter waiting on this request, retry
+ till a flush or fsync (subject to conf->resync_after_fsync).
+ */
+ wb_inode->transit -= req->total_size;
+
+ req->total_size = 0;
+
+ list_del_init(&req->winds);
+ list_del_init(&req->todo);
+ list_del_init(&req->wip);
+
+ /* sanitize ordering flags to retry */
+ req->ordering.go = 0;
+
+ /* Add back to todo list to retry */
+ list_add(&req->todo, &wb_inode->todo);
+
+out:
+ return;
}
+void
+__wb_add_head_for_retry(wb_request_t *head)
+{
+ wb_request_t *req = NULL, *tmp = NULL;
-ssize_t
-wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
-{
- wb_request_t *dummy = NULL, *request = NULL;
- wb_request_t *first_request = NULL, *next = NULL;
- size_t total_count = 0, count = 0;
- size_t copied = 0;
- call_frame_t *sync_frame = NULL;
- struct iobref *iobref = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- ssize_t current_size = 0, bytes = 0;
- size_t bytecount = 0;
- wb_conf_t *conf = NULL;
- fd_t *fd = NULL;
-
- conf = file->this->private;
- list_for_each_entry (request, winds, winds) {
- total_count += request->stub->args.writev.count;
- if (total_count > 0) {
- break;
- }
- }
+ if (!head)
+ goto out;
- if (total_count == 0) {
- goto out;
- }
-
- list_for_each_entry_safe (request, dummy, winds, winds) {
- if (!vector) {
- vector = MALLOC (VECTORSIZE (MAX_VECTOR_COUNT));
- if (vector == NULL) {
- bytes = -1;
- goto out;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- bytes = -1;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- bytes = -1;
- goto out;
- }
-
- INIT_LIST_HEAD (&local->winds);
-
- first_request = request;
- current_size = 0;
- }
+ list_for_each_entry_safe_reverse(req, tmp, &head->winds, winds)
+ {
+ __wb_add_request_for_retry(req);
+ }
- count += request->stub->args.writev.count;
- bytecount = VECTORSIZE (request->stub->args.writev.count);
- memcpy (((char *)vector)+copied,
- request->stub->args.writev.vector,
- bytecount);
- copied += bytecount;
-
- current_size += request->write_size;
-
- if (request->stub->args.writev.iobref) {
- iobref_merge (iobref,
- request->stub->args.writev.iobref);
- }
+ __wb_add_request_for_retry(head);
- next = NULL;
- if (request->winds.next != winds) {
- next = list_entry (request->winds.next,
- wb_request_t, winds);
- }
+out:
+ return;
+}
- list_del_init (&request->winds);
- list_add_tail (&request->winds, &local->winds);
-
- if ((!next)
- || ((count + next->stub->args.writev.count)
- > MAX_VECTOR_COUNT)
- || ((current_size + next->write_size)
- > conf->aggregate_size))
- {
- sync_frame = copy_frame (frame);
- if (sync_frame == NULL) {
- bytes = -1;
- goto out;
- }
-
- sync_frame->local = local;
- local->file = file;
-
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
-
- fd_ref (fd);
-
- bytes += current_size;
- STACK_WIND (sync_frame,
- wb_sync_cbk,
- FIRST_CHILD(sync_frame->this),
- FIRST_CHILD(sync_frame->this)->fops->writev,
- fd, vector,
- count,
- first_request->stub->args.writev.off,
- iobref);
-
- iobref_unref (iobref);
- FREE (vector);
- first_request = NULL;
- iobref = NULL;
- vector = NULL;
- sync_frame = NULL;
- local = NULL;
- copied = count = 0;
- }
- }
+void
+wb_add_head_for_retry(wb_request_t *head)
+{
+ if (!head)
+ goto out;
+
+ LOCK(&head->wb_inode->lock);
+ {
+ __wb_add_head_for_retry(head);
+ }
+ UNLOCK(&head->wb_inode->lock);
out:
- if (sync_frame != NULL) {
- sync_frame->local = NULL;
- STACK_DESTROY (sync_frame->root);
- }
+ return;
+}
- if (local != NULL) {
- FREE (local);
- }
+void
+__wb_fulfill_request_err(wb_request_t *req, int32_t op_errno)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *waiter = NULL;
+ wb_conf_t *conf = NULL;
+
+ wb_inode = req->wb_inode;
+
+ conf = wb_inode->this->private;
- if (iobref != NULL) {
- iobref_unref (iobref);
+ req->op_ret = -1;
+ req->op_errno = op_errno;
+
+ if (req->ordering.lied)
+ waiter = __wb_request_waiting_on(req);
+
+ if (!req->ordering.lied || waiter) {
+ if (!req->ordering.lied) {
+ /* response to app is still pending, send failure in
+ * response.
+ */
+ } else {
+ /* response was sent, store the error in a
+ * waiter (either an fsync or flush).
+ */
+ waiter->op_ret = -1;
+ waiter->op_errno = op_errno;
}
- if (vector != NULL) {
- FREE (vector);
+ if (!req->ordering.lied || (waiter->stub->fop == GF_FOP_FLUSH) ||
+ ((waiter->stub->fop == GF_FOP_FSYNC) &&
+ !conf->resync_after_fsync)) {
+ /* No retry needed, forget the request */
+ __wb_fulfill_request(req);
+ return;
}
+ }
- return bytes;
-}
+ __wb_add_request_for_retry(req);
+ return;
+}
-int32_t
-wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
+void
+wb_head_done(wb_request_t *head)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_inode_t *wb_inode = NULL;
- request = local->request;
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ wb_inode = head->wb_inode;
+
+ LOCK(&wb_inode->lock);
+ {
+ list_for_each_entry_safe(req, tmp, &head->winds, winds)
+ {
+ __wb_fulfill_request(req);
}
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ __wb_fulfill_request(head);
+ }
+ UNLOCK(&wb_inode->lock);
+}
- if (request != NULL) {
- wb_request_unref (request);
- }
+void
+__wb_fulfill_err(wb_request_t *head, int op_errno)
+{
+ wb_request_t *req = NULL, *tmp = NULL;
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ if (!head)
+ goto out;
- STACK_DESTROY (process_frame->root);
- }
+ head->wb_inode->dontsync++;
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ list_for_each_entry_safe_reverse(req, tmp, &head->winds, winds)
+ {
+ __wb_fulfill_request_err(req, op_errno);
+ }
- fd_unref (fd);
- }
+ __wb_fulfill_request_err(head, op_errno);
- return 0;
+out:
+ return;
}
+void
+wb_fulfill_err(wb_request_t *head, int op_errno)
+{
+ wb_inode_t *wb_inode = NULL;
+
+ wb_inode = head->wb_inode;
+
+ LOCK(&wb_inode->lock);
+ {
+ __wb_fulfill_err(head, op_errno);
+ }
+ UNLOCK(&wb_inode->lock);
+}
-static int32_t
-wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+void
+__wb_modify_write_request(wb_request_t *req, int synced_size)
{
- STACK_WIND (frame, wb_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ struct iovec *vector = NULL;
+ int count = 0;
+
+ if (!req || synced_size == 0)
+ goto out;
+
+ req->write_size -= synced_size;
+ req->stub->args.offset += synced_size;
+
+ vector = req->stub->args.vector;
+ count = req->stub->args.count;
+
+ req->stub->args.count = iov_skip(vector, count, synced_size);
+
+out:
+ return;
}
+int
+__wb_fulfill_short_write(wb_request_t *req, int size, gf_boolean_t *fulfilled)
+{
+ int accounted_size = 0;
-int32_t
-wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
-
- if (loc->inode) {
- /* FIXME: fd_lookup extends life of fd till stat returns */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- iter_fd = NULL;
- }
- }
- }
+ if (req == NULL)
+ goto out;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (req->write_size <= size) {
+ accounted_size = req->write_size;
+ __wb_fulfill_request(req);
+ *fulfilled = 1;
+ } else {
+ accounted_size = size;
+ __wb_modify_write_request(req, size);
+ *fulfilled = 0;
+ }
- local->file = file;
+out:
+ return accounted_size;
+}
- frame->local = local;
+void
+wb_fulfill_short_write(wb_request_t *head, int size)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *req = NULL, *next = NULL;
+ int accounted_size = 0;
+ gf_boolean_t fulfilled = _gf_false;
- if (file) {
- stub = fop_stat_stub (frame, wb_stat_helper, loc);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (!head)
+ goto out;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode = head->wb_inode;
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ req = head;
- } else {
- STACK_WIND (frame, wb_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- }
- return 0;
+ LOCK(&wb_inode->lock);
+ {
+ /* hold a reference to head so that __wb_fulfill_short_write
+ * won't free it. We need head for a cleaner list traversal as
+ * list_for_each_entry_safe doesn't iterate over "head" member.
+ * So, if we pass "next->winds" as head to list_for_each_entry,
+ * "next" is skipped. For a simpler logic we need to traverse
+ * the list in the order. So, we start traversal from
+ * "head->winds" and hence we want head to be alive.
+ */
+ __wb_request_ref(head);
-unwind:
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
+ next = list_entry(head->winds.next, wb_request_t, winds);
- if (stub) {
- call_stub_destroy (stub);
- }
-
- if (iter_fd != NULL) {
- fd_unref (iter_fd);
- }
+ accounted_size = __wb_fulfill_short_write(head, size, &fulfilled);
- return 0;
-}
+ size -= accounted_size;
+ if (size == 0) {
+ if (fulfilled && (next != head))
+ req = next;
-int32_t
-wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
-
- local = frame->local;
- file = local->file;
+ goto done;
+ }
- request = local->request;
- if ((file != NULL) && (request != NULL)) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ list_for_each_entry_safe(req, next, &head->winds, winds)
+ {
+ accounted_size = __wb_fulfill_short_write(req, size, &fulfilled);
+ size -= accounted_size;
+
+ if (size == 0) {
+ if (fulfilled && (next != head))
+ req = next;
+ break;
+ }
}
+ done:
+ __wb_request_unref(head);
+ }
+ UNLOCK(&wb_inode->lock);
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ wb_add_head_for_retry(req);
+out:
+ return;
+}
- return 0;
+int
+wb_fulfill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *head = NULL;
+
+ head = frame->local;
+ frame->local = NULL;
+
+ wb_inode = head->wb_inode;
+
+ /* There could be a readdirp session in progress. Since wb_fulfill_cbk
+ * can potentially remove a request from liability queue,
+ * wb_readdirp_cbk will miss writes on this inode (as it invalidates
+ * stats only if liability queue is not empty) and hence mark inode
+ * for invalidation of stats in readdirp response. Specifically this
+ * code fixes the following race mentioned in wb_readdirp_cbk:
+ */
+
+ /* <removed comment from wb_readdirp_cbk>
+ * We cannot guarantee integrity of entry->d_stat as there are cached
+ * writes. The stat is most likely stale as it doesn't account the
+ * cached writes. However, checking for non-empty liability list here is
+ * not a fool-proof solution as there can be races like,
+ * 1. readdirp is successful on posix
+ * 2. sync of cached write is successful on posix
+ * 3. write-behind received sync response and removed the request from
+ * liability queue
+ * 4. readdirp response is processed at write-behind
+ *
+ * In the above scenario, stat for the file is sent back in readdirp
+ * response but it is stale.
+ * </comment> */
+ wb_set_invalidate(wb_inode);
+
+ if (op_ret == -1) {
+ wb_fulfill_err(head, op_errno);
+ } else if (op_ret < head->total_size) {
+ wb_fulfill_short_write(head, op_ret);
+ } else {
+ wb_head_done(head);
+ }
+
+ wb_process_queue(wb_inode);
+
+ STACK_DESTROY(frame->root);
+
+ return 0;
}
+#define WB_IOV_LOAD(vec, cnt, req, head) \
+ do { \
+ memcpy(&vec[cnt], req->stub->args.vector, \
+ (req->stub->args.count * sizeof(vec[0]))); \
+ cnt += req->stub->args.count; \
+ head->total_size += req->write_size; \
+ } while (0)
-int32_t
-wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+int
+wb_fulfill_head(wb_inode_t *wb_inode, wb_request_t *head)
{
- STACK_WIND (frame,
- wb_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ struct iovec vector[MAX_VECTOR_COUNT];
+ int count = 0;
+ wb_request_t *req = NULL;
+ call_frame_t *frame = NULL;
+
+ /* make sure head->total_size is updated before we run into any
+ * errors
+ */
+
+ WB_IOV_LOAD(vector, count, head, head);
+
+ list_for_each_entry(req, &head->winds, winds)
+ {
+ WB_IOV_LOAD(vector, count, req, head);
+
+ if (iobref_merge(head->stub->args.iobref, req->stub->args.iobref))
+ goto err;
+ }
+
+ frame = create_frame(wb_inode->this, wb_inode->this->ctx->pool);
+ if (!frame)
+ goto err;
+
+ frame->root->lk_owner = head->lk_owner;
+ frame->root->pid = head->client_pid;
+ frame->local = head;
+
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->transit += head->total_size;
+ }
+ UNLOCK(&wb_inode->lock);
+
+ STACK_WIND(frame, wb_fulfill_cbk, FIRST_CHILD(frame->this),
+ FIRST_CHILD(frame->this)->fops->writev, head->fd, vector, count,
+ head->stub->args.offset, head->stub->args.flags,
+ head->stub->args.iobref, NULL);
+
+ return 0;
+err:
+ /* frame creation failure */
+ wb_fulfill_err(head, ENOMEM);
+
+ return ENOMEM;
}
+#define NEXT_HEAD(head, req) \
+ do { \
+ if (head) \
+ ret |= wb_fulfill_head(wb_inode, head); \
+ head = req; \
+ expected_offset = req->stub->args.offset + req->write_size; \
+ curr_aggregate = 0; \
+ vector_count = 0; \
+ } while (0)
-int32_t
-wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+int
+wb_fulfill(wb_inode_t *wb_inode, list_head_t *liabilities)
{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int op_errno = EINVAL;
+ wb_request_t *req = NULL;
+ wb_request_t *head = NULL;
+ wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
+ off_t expected_offset = 0;
+ size_t curr_aggregate = 0;
+ size_t vector_count = 0;
+ int ret = 0;
+
+ conf = wb_inode->this->private;
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
+ list_for_each_entry_safe(req, tmp, liabilities, winds)
+ {
+ list_del_init(&req->winds);
- STACK_UNWIND_STRICT (fstat, frame, -1, EBADFD, NULL);
- return 0;
+ if (!head) {
+ NEXT_HEAD(head, req);
+ continue;
}
- file = (wb_file_t *)(long)tmp_file;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL);
- return 0;
+ if (req->fd != head->fd) {
+ NEXT_HEAD(head, req);
+ continue;
}
- local->file = file;
+ if (!is_same_lkowner(&req->lk_owner, &head->lk_owner)) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
- frame->local = local;
+ if (expected_offset != req->stub->args.offset) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
- if (file) {
- stub = fop_fstat_stub (frame, wb_fstat_helper, fd);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if ((curr_aggregate + req->write_size) > conf->aggregate_size) {
+ NEXT_HEAD(head, req);
+ continue;
+ }
- /*
- FIXME:should the request queue be emptied in case of error?
- */
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
- } else {
- STACK_WIND (frame,
- wb_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
+ if (vector_count + req->stub->args.count > MAX_VECTOR_COUNT) {
+ NEXT_HEAD(head, req);
+ continue;
}
- return 0;
-unwind:
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
+ list_add_tail(&req->winds, &head->winds);
+ curr_aggregate += req->write_size;
+ vector_count += req->stub->args.count;
+ }
- if (stub) {
- call_stub_destroy (stub);
- }
+ if (head)
+ ret |= wb_fulfill_head(wb_inode, head);
- return 0;
+ return ret;
}
+void
+wb_do_unwinds(wb_inode_t *wb_inode, list_head_t *lies)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ call_frame_t *frame = NULL;
+ struct iatt buf = {
+ 0,
+ };
+
+ list_for_each_entry_safe(req, tmp, lies, unwinds)
+ {
+ frame = req->stub->frame;
+
+ STACK_UNWIND_STRICT(writev, frame, req->op_ret, req->op_errno, &buf,
+ &buf, NULL); /* :O */
+ req->stub->frame = NULL;
+
+ list_del_init(&req->unwinds);
+ wb_request_unref(req);
+ }
+
+ return;
+}
-int32_t
-wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- call_frame_t *process_frame = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if ((request != NULL) && (file != NULL)) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+void
+__wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ list_for_each_entry_safe(req, tmp, &wb_inode->temptation, lie)
+ {
+ if (!req->ordering.fulfilled &&
+ wb_inode->window_current > wb_inode->window_conf)
+ continue;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+ list_del_init(&req->lie);
+ list_move_tail(&req->unwinds, lies);
- if (request) {
- wb_request_unref (request);
+ wb_inode->window_current += req->orig_size;
+
+ wb_inode->gen++;
+
+ if (!req->ordering.fulfilled) {
+ /* burden increased */
+ list_add_tail(&req->lie, &wb_inode->liability);
+
+ req->ordering.lied = 1;
+
+ uuid_utoa_r(req->gfid, gfid);
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): added req to liability "
+ "queue. inode-generation-number=%" PRIu64,
+ req->stub->frame->root->unique, gf_fop_list[req->fop],
+ gfid, req->gen, wb_inode->gen);
}
+ }
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ return;
+}
- STACK_DESTROY (process_frame->root);
+int
+__wb_collapse_small_writes(wb_conf_t *conf, wb_request_t *holder,
+ wb_request_t *req)
+{
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int ret = -1;
+ ssize_t required_size = 0;
+ size_t holder_len = 0;
+ size_t req_len = 0;
+
+ if (!holder->iobref) {
+ holder_len = iov_length(holder->stub->args.vector,
+ holder->stub->args.count);
+ req_len = iov_length(req->stub->args.vector, req->stub->args.count);
+
+ required_size = max((conf->page_size), (holder_len + req_len));
+ iobuf = iobuf_get2(req->wb_inode->this->ctx->iobuf_pool, required_size);
+ if (iobuf == NULL) {
+ goto out;
}
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ iobuf_unref(iobuf);
+ goto out;
+ }
- fd_unref (fd);
+ ret = iobref_add(iobref, iobuf);
+ if (ret != 0) {
+ gf_msg(req->wb_inode->this->name, GF_LOG_WARNING, -ret,
+ WRITE_BEHIND_MSG_INVALID_ARGUMENT,
+ "cannot add iobuf (%p) into iobref (%p)", iobuf, iobref);
+ iobuf_unref(iobuf);
+ iobref_unref(iobref);
+ goto out;
}
- return 0;
-}
+ iov_unload(iobuf->ptr, holder->stub->args.vector,
+ holder->stub->args.count);
+ holder->stub->args.vector[0].iov_base = iobuf->ptr;
+ holder->stub->args.count = 1;
+ iobref_unref(holder->stub->args.iobref);
+ holder->stub->args.iobref = iobref;
-static int32_t
-wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- STACK_WIND (frame,
- wb_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
+ iobuf_unref(iobuf);
- return 0;
+ holder->iobref = iobref_ref(iobref);
+ }
+
+ ptr = holder->stub->args.vector[0].iov_base + holder->write_size;
+
+ iov_unload(ptr, req->stub->args.vector, req->stub->args.count);
+
+ holder->stub->args.vector[0].iov_len += req->write_size;
+ holder->write_size += req->write_size;
+ holder->ordering.size += req->write_size;
+
+ ret = 0;
+out:
+ return ret;
}
+void
+__wb_preprocess_winds(wb_inode_t *wb_inode)
+{
+ off_t offset_expected = 0;
+ ssize_t space_left = 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *holder = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
+ ssize_t page_size = 0;
+ char gfid[64] = {
+ 0,
+ };
+
+ /* With asynchronous IO from a VM guest (as a file), there
+ can be two sequential writes happening in two regions
+ of the file. But individual (broken down) IO requests
+ can arrive interleaved.
+
+ TODO: cycle for each such sequence sifting
+ through the interleaved ops
+ */
+
+ conf = wb_inode->this->private;
+ page_size = conf->page_size;
+
+ list_for_each_entry_safe(req, tmp, &wb_inode->todo, todo)
+ {
+ if (wb_inode->dontsync && req->ordering.lied) {
+ /* sync has failed. Don't pick lies _again_ for winding
+ * as winding these lies again will trigger an infinite
+ * recursion of wb_process_queue being called from a
+ * failed fulfill. However, pick non-lied requests for
+ * winding so that application won't block indefinitely
+ * waiting for write result.
+ */
+
+ uuid_utoa_r(req->gfid, gfid);
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): not setting ordering.go"
+ "as dontsync is set",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen);
+
+ continue;
+ }
+
+ if (!req->ordering.tempted) {
+ if (holder) {
+ if (wb_requests_conflict(holder, req))
+ /* do not hold on write if a
+ dependent write is in queue */
+ holder->ordering.go = 1;
+ }
+ /* collapse only non-sync writes */
+ continue;
+ } else if (!holder) {
+ /* holder is always a non-sync write */
+ holder = req;
+ continue;
+ }
+
+ offset_expected = holder->stub->args.offset + holder->write_size;
+
+ if (req->stub->args.offset != offset_expected) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ if (!is_same_lkowner(&req->lk_owner, &holder->lk_owner)) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ if (req->fd != holder->fd) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ space_left = page_size - holder->write_size;
+
+ if (space_left < req->write_size) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ ret = __wb_collapse_small_writes(conf, holder, req);
+ if (ret)
+ continue;
+
+ /* collapsed request is as good as wound
+ (from its p.o.v)
+ */
+ list_del_init(&req->todo);
+ __wb_fulfill_request(req);
+
+ /* Only the last @holder in queue which
+
+ - does not have any non-buffered-writes following it
+ - has not yet filled its capacity
+
+ does not get its 'go' set, in anticipation of the arrival
+ of consecutive smaller writes.
+ */
+ }
+
+ /* but if trickling writes are enabled, then do not hold back
+ writes if there are no outstanding requests
+ */
-int32_t
-wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+ if (conf->trickling_writes && !wb_inode->transit && holder)
+ holder->ordering.go = 1;
+
+ if (wb_inode->dontsync > 0)
+ wb_inode->dontsync--;
+
+ return;
+}
+
+int
+__wb_handle_failed_conflict(wb_request_t *req, wb_request_t *conflict,
+ list_head_t *tasks)
{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = ENOMEM;
+ wb_conf_t *conf = NULL;
+ char gfid[64] = {
+ 0,
+ };
+
+ conf = req->wb_inode->this->private;
+
+ uuid_utoa_r(req->gfid, gfid);
+
+ if ((req->stub->fop != GF_FOP_FLUSH) &&
+ ((req->stub->fop != GF_FOP_FSYNC) || conf->resync_after_fsync)) {
+ if (!req->ordering.lied && list_empty(&conflict->wip)) {
+ /* If request itself is in liability queue,
+ * 1. We cannot unwind as the response has already been
+ * sent.
+ * 2. We cannot wind till conflict clears up.
+ * 3. So, skip the request for now.
+ * 4. Otherwise, resume (unwind) it with error.
+ */
+ req->op_ret = -1;
+ req->op_errno = conflict->op_errno;
+ if ((req->stub->fop == GF_FOP_TRUNCATE) ||
+ (req->stub->fop == GF_FOP_FTRUNCATE)) {
+ req->stub->frame->local = NULL;
+ }
+
+ list_del_init(&req->todo);
+ list_add_tail(&req->winds, tasks);
+
+ gf_msg_debug(req->wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): A conflicting write "
+ "request in liability queue has failed "
+ "to sync (error = \"%s\"), "
+ "unwinding this request as a failure",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen,
+ strerror(req->op_errno));
+
+ if (req->ordering.tempted) {
+ /* make sure that it won't be unwound in
+ * wb_do_unwinds too. Otherwise there'll be
+ * a double wind.
+ */
+ list_del_init(&req->lie);
+
+ gf_msg_debug(req->wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, "
+ "gfid=%s, gen=%" PRIu64
+ "): "
+ "removed from liability queue",
+ req->unique, gf_fop_list[req->fop], gfid,
+ req->gen);
+
+ __wb_fulfill_request(req);
+ }
+ }
+ } else {
+ gf_msg_debug(req->wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): A conflicting write request "
+ "in liability queue has failed to sync "
+ "(error = \"%s\"). This is an "
+ "FSYNC/FLUSH and we need to maintain ordering "
+ "guarantees with other writes in TODO queue. "
+ "Hence doing nothing now",
+ req->unique, gf_fop_list[req->fop], gfid, req->gen,
+ strerror(conflict->op_errno));
+
+ /* flush and fsync (without conf->resync_after_fsync) act as
+ barriers. We cannot unwind them out of
+ order, when there are earlier generation writes just because
+ there is a conflicting liability with an error. So, wait for
+ our turn till there are no conflicting liabilities.
+
+ This situation can arise when there liabilities spread across
+ multiple generations. For eg., consider two writes with
+ following characterstics:
+
+ 1. they belong to different generations gen1, gen2 and
+ (gen1 > gen2).
+ 2. they overlap.
+ 3. both are liabilities.
+ 4. gen1 write was attempted to sync, but the attempt failed.
+ 5. there was no attempt to sync gen2 write yet.
+ 6. A flush (as part of close) is issued and gets a gen no
+ gen3.
+
+ In the above scenario, if flush is unwound without waiting
+ for gen1 and gen2 writes either to be successfully synced or
+ purged, we end up with these two writes in wb_inode->todo
+ list forever as there will be no attempt to process the queue
+ as flush is the last operation.
+ */
+ }
- if (loc->inode)
- {
- /*
- FIXME: fd_lookup extends life of fd till the execution of
- truncate_cbk
- */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)){
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- }
- }
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ return 0;
+}
- local->file = file;
-
- frame->local = local;
- if (file) {
- stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
- offset);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+int
+__wb_pick_winds(wb_inode_t *wb_inode, list_head_t *tasks,
+ list_head_t *liabilities)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *conflict = NULL;
+ char req_gfid[64] =
+ {
+ 0,
+ },
+ conflict_gfid[64] = {
+ 0,
+ };
+
+ list_for_each_entry_safe(req, tmp, &wb_inode->todo, todo)
+ {
+ uuid_utoa_r(req->gfid, req_gfid);
+
+ conflict = wb_liability_has_conflict(wb_inode, req);
+ if (conflict) {
+ uuid_utoa_r(conflict->gfid, conflict_gfid);
+
+ gf_msg_debug(wb_inode->this->name, 0,
+ "Not winding request due to a "
+ "conflicting write in liability queue. "
+ "REQ: unique=%" PRIu64
+ ", fop=%s, "
+ "gen=%" PRIu64
+ ", gfid=%s. "
+ "CONFLICT: unique=%" PRIu64
+ ", fop=%s, "
+ "gen=%" PRIu64
+ ", gfid=%s, "
+ "conflicts-sync-failed?=%s, "
+ "conflicts-error=%s",
+ req->unique, gf_fop_list[req->fop], req->gen, req_gfid,
+ conflict->unique, gf_fop_list[conflict->fop],
+ conflict->gen, conflict_gfid,
+ (conflict->op_ret == 1) ? "yes" : "no",
+ strerror(conflict->op_errno));
+
+ if (conflict->op_ret == -1) {
+ /* There is a conflicting liability which failed
+ * to sync in previous attempts, resume the req
+ * and fail, unless its an fsync/flush.
+ */
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ __wb_handle_failed_conflict(req, conflict, tasks);
+ } else {
+ /* There is a conflicting liability which was
+ * not attempted to sync even once. Wait till
+ * at least one attempt to sync is made.
+ */
+ }
+
+ continue;
+ }
+
+ if (req->ordering.tempted && !req->ordering.go) {
+ /* wait some more */
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64 ", fop=%s, gen=%" PRIu64
+ ", gfid=%s): ordering.go is not set, "
+ "hence not winding",
+ req->unique, gf_fop_list[req->fop], req->gen,
+ req_gfid);
+ continue;
+ }
+
+ if (req->stub->fop == GF_FOP_WRITE) {
+ conflict = wb_wip_has_conflict(wb_inode, req);
+
+ if (conflict) {
+ uuid_utoa_r(conflict->gfid, conflict_gfid);
+
+ gf_msg_debug(wb_inode->this->name, 0,
+ "Not winding write request as "
+ "a conflicting write is being "
+ "synced to backend. "
+ "REQ: unique=%" PRIu64
+ " fop=%s,"
+ " gen=%" PRIu64
+ ", gfid=%s. "
+ "CONFLICT: unique=%" PRIu64
+ " "
+ "fop=%s, gen=%" PRIu64
+ ", "
+ "gfid=%s",
+ req->unique, gf_fop_list[req->fop], req->gen,
+ req_gfid, conflict->unique,
+ gf_fop_list[conflict->fop], conflict->gen,
+ conflict_gfid);
+ continue;
+ }
+
+ list_add_tail(&req->wip, &wb_inode->wip);
+ req->wind_count++;
+
+ if (!req->ordering.tempted)
+ /* unrefed in wb_writev_cbk */
+ req->stub->frame->local = __wb_request_ref(req);
+ }
+
+ gf_msg_debug(wb_inode->this->name, 0,
+ "(unique=%" PRIu64
+ ", fop=%s, gfid=%s, "
+ "gen=%" PRIu64
+ "): picking the request for "
+ "winding",
+ req->unique, gf_fop_list[req->fop], req_gfid, req->gen);
+
+ list_del_init(&req->todo);
+
+ if (req->ordering.tempted) {
+ list_add_tail(&req->winds, liabilities);
} else {
- STACK_WIND (frame,
- wb_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
+ list_add_tail(&req->winds, tasks);
}
+ }
- return 0;
+ return 0;
+}
-unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL);
+void
+wb_do_winds(wb_inode_t *wb_inode, list_head_t *tasks)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+
+ list_for_each_entry_safe(req, tmp, tasks, winds)
+ {
+ list_del_init(&req->winds);
- if (stub) {
- call_stub_destroy (stub);
+ if (req->op_ret == -1) {
+ call_unwind_error_keep_stub(req->stub, req->op_ret, req->op_errno);
+ } else {
+ call_resume_keep_stub(req->stub);
}
- return 0;
+ wb_request_unref(req);
+ }
}
+void
+wb_process_queue(wb_inode_t *wb_inode)
+{
+ list_head_t tasks;
+ list_head_t lies;
+ list_head_t liabilities;
+ int wind_failure = 0;
-int32_t
-wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if ((request != NULL) && (file != NULL)) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ INIT_LIST_HEAD(&tasks);
+ INIT_LIST_HEAD(&lies);
+ INIT_LIST_HEAD(&liabilities);
+
+ do {
+ gf_log_callingfn(wb_inode->this->name, GF_LOG_DEBUG,
+ "processing queues");
+
+ LOCK(&wb_inode->lock);
+ {
+ __wb_preprocess_winds(wb_inode);
+
+ __wb_pick_winds(wb_inode, &tasks, &liabilities);
+
+ __wb_pick_unwinds(wb_inode, &lies);
}
+ UNLOCK(&wb_inode->lock);
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf);
+ if (!list_empty(&lies))
+ wb_do_unwinds(wb_inode, &lies);
- return 0;
-}
+ if (!list_empty(&tasks))
+ wb_do_winds(wb_inode, &tasks);
+ /* If there is an error in wb_fulfill before winding write
+ * requests, we would miss invocation of wb_process_queue
+ * from wb_fulfill_cbk. So, retry processing again.
+ */
+ if (!list_empty(&liabilities))
+ wind_failure = wb_fulfill(wb_inode, &liabilities);
+ } while (wind_failure);
-static int32_t
-wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame,
- wb_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
+ return;
}
-
-int32_t
-wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int op_errno = EINVAL;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (ftruncate, frame, -1, EBADFD,
- NULL, NULL);
- return 0;
- }
+void
+wb_set_inode_size(wb_inode_t *wb_inode, struct iatt *postbuf)
+{
+ GF_ASSERT(wb_inode);
+ GF_ASSERT(postbuf);
+
+ LOCK(&wb_inode->lock);
+ {
+ wb_inode->size = postbuf->ia_size;
+ }
+ UNLOCK(&wb_inode->lock);
+}
- file = (wb_file_t *)(long)tmp_file;
+int
+wb_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ wb_request_t *req = NULL;
+ wb_inode_t *wb_inode;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM,
- NULL, NULL);
- return 0;
- }
+ req = frame->local;
+ frame->local = NULL;
+ wb_inode = req->wb_inode;
- local->file = file;
+ LOCK(&req->wb_inode->lock);
+ {
+ list_del_init(&req->wip);
+ }
+ UNLOCK(&req->wb_inode->lock);
- frame->local = local;
+ wb_request_unref(req);
- if (file) {
- stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
- offset);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ /* requests could be pending while this was in progress */
+ wb_process_queue(wb_inode);
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
- } else {
- STACK_WIND (frame,
- wb_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- }
+int
+wb_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_WIND(frame, wb_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
- return 0;
+int
+wb_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ gf_boolean_t wb_disabled = 0;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int32_t op_errno = EINVAL;
+ int o_direct = O_DIRECT;
+
+ conf = this->private;
+
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!conf->strict_O_DIRECT)
+ o_direct = 0;
+
+ if (fd->flags & (O_SYNC | O_DSYNC | o_direct))
+ wb_disabled = 1;
+
+ if (flags & (O_SYNC | O_DSYNC | o_direct))
+ wb_disabled = 1;
+
+ if (wb_disabled)
+ stub = fop_writev_stub(frame, wb_writev_helper, fd, vector, count,
+ offset, flags, iobref, xdata);
+ else
+ stub = fop_writev_stub(frame, NULL, fd, vector, count, offset, flags,
+ iobref, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (wb_disabled)
+ ret = wb_enqueue(wb_inode, stub);
+ else
+ ret = wb_enqueue_tempted(wb_inode, stub);
+
+ if (!ret) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ wb_process_queue(wb_inode);
+
+ return 0;
unwind:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, NULL);
- if (stub) {
- call_stub_destroy (stub);
- }
+ if (stub)
+ call_stub_destroy(stub);
- return 0;
+ return 0;
}
+int
+wb_readv_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
-int32_t
-wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *statpre, struct stat *statpost)
+int
+wb_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
- request = local->request;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost);
+ stub = fop_readv_stub(frame, wb_readv_helper, fd, size, offset, flags,
+ xdata);
+ if (!stub)
+ goto unwind;
- if (request) {
- wb_request_unref (request);
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- if (request && (process_frame != NULL)) {
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ wb_process_queue(wb_inode);
- STACK_DESTROY (process_frame->root);
- }
+ return 0;
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+unwind:
+ STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, NULL);
- fd_unref (fd);
- }
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
- return 0;
+noqueue:
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
}
+int
+wb_flush_bg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
+}
-static int32_t
-wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct stat *stbuf, int32_t valid)
+int
+wb_flush_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame,
- wb_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc,
- stbuf,
- valid);
+ wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_frame_t *bg_frame = NULL;
+ int32_t op_errno = 0;
+ int op_ret = 0;
+
+ conf = this->private;
+
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (conf->flush_behind)
+ goto flushbehind;
+
+ STACK_WIND(frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+
+flushbehind:
+ bg_frame = copy_frame(frame);
+ if (!bg_frame) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ STACK_WIND(bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ /* fall through */
+unwind:
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL);
- return 0;
+ return 0;
}
-
-int32_t
-wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct stat *stbuf, int32_t valid)
+int
+wb_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
+ stub = fop_flush_stub(frame, wb_flush_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- frame->local = local;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- if (!(valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME))) {
- STACK_WIND (frame,
- wb_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
- goto out;
- }
+ wb_process_queue(wb_inode);
- if (loc->inode) {
- /*
- FIXME: fd_lookup extends life of fd till the execution
- of wb_utimens_cbk
- */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- }
- }
+ return 0;
- }
+unwind:
+ STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL);
- local->file = file;
+ if (stub)
+ call_stub_destroy(stub);
- if (file) {
- stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, valid);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ return 0;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+noqueue:
+ STACK_WIND(frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
- } else {
- STACK_WIND (frame,
- wb_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc,
- stbuf, valid);
- }
+int
+wb_fsync_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+}
+
+int
+wb_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = EINVAL;
+
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
+
+ stub = fop_fsync_stub(frame, wb_fsync_helper, fd, datasync, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue(wb_inode);
+
+ return 0;
- return 0;
unwind:
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno,
- NULL, NULL);
+ STACK_UNWIND_STRICT(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- if (stub) {
- call_stub_destroy (stub);
- }
-out:
- return 0;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-int32_t
-wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+int
+wb_stat_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t wbflags = 0, flags = 0;
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
- wb_local_t *local = NULL;
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
- conf = this->private;
+int
+wb_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ wb_inode = wb_inode_ctx_get(this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- flags = local->flags;
- wbflags = local->wbflags;
+ stub = fop_stat_stub(frame, wb_stat_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
- if (op_ret != -1) {
- file = wb_file_create (this, fd);
- if (file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- /*
- If mandatory locking has been enabled on this file,
- we disable caching on it
- */
-
- if ((fd->inode->st_mode & S_ISGID)
- && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT then, we disable chaching */
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && conf->enable_O_SYNC == _gf_true)) {
- file->window_conf = 0;
- }
+ wb_process_queue(wb_inode);
- if (wbflags & GF_OPEN_NOWB) {
- file->disabled = 1;
- }
-
- LOCK_INIT (&file->lock);
- }
-
-out:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, ENOMEM, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
+int
+wb_fstat_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
-int32_t
-wb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+int
+wb_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- local->flags = flags;
- local->wbflags = wbflags;
-
- frame->local = local;
+ stub = fop_fstat_stub(frame, wb_fstat_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- STACK_WIND (frame,
- wb_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
-unwind:
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- return 0;
-}
+ wb_process_queue(wb_inode);
+ return 0;
-int32_t
-wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
-{
- long flags = 0;
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
-
- if (op_ret != -1) {
- file = wb_file_create (this, fd);
- if (file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- /*
- * If mandatory locking has been enabled on this file,
- * we disable caching on it
- */
- if ((fd->inode->st_mode & S_ISGID)
- && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT then, we disable chaching */
- if (frame->local) {
- flags = (long)frame->local;
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && (conf->enable_O_SYNC == _gf_true))) {
- file->window_conf = 0;
- }
- }
+unwind:
+ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, NULL, NULL);
- LOCK_INIT (&file->lock);
- }
-
- frame->local = NULL;
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
-out:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, preparent,
- postparent);
- return 0;
+noqueue:
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
-
int32_t
-wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+wb_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- frame->local = (void *)(long)flags;
+ GF_ASSERT(frame->local);
- STACK_WIND (frame,
- wb_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ if (op_ret == 0)
+ wb_set_inode_size(frame->local, postbuf);
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+int
+wb_truncate_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND(frame, wb_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
-size_t
-__wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
+int
+wb_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- wb_request_t *request = NULL;
- size_t size = 0;
- char first_request = 1;
- off_t offset_expected = 0;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- break;
- }
+ wb_inode = wb_inode_create(this, loc->inode);
+ if (!wb_inode)
+ goto unwind;
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- offset_expected = request->stub->args.writev.off;
- }
-
- if (request->stub->args.writev.off != offset_expected) {
- break;
- }
-
- size += request->write_size;
- offset_expected += request->write_size;
- file->aggregate_current -= request->write_size;
-
- request->flags.write_request.stack_wound = 1;
- list_add_tail (&request->winds, winds);
- }
- }
-
- return size;
-}
+ frame->local = wb_inode;
+ stub = fop_truncate_stub(frame, wb_truncate_helper, loc, offset, xdata);
+ if (!stub)
+ goto unwind;
-void
-__wb_can_wind (list_head_t *list, char *other_fop_in_queue,
- char *non_contiguous_writes, char *incomplete_writes)
-{
- wb_request_t *request = NULL;
- char first_request = 1;
- off_t offset_expected = 0;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- if (request->stub && other_fop_in_queue) {
- *other_fop_in_queue = 1;
- }
- break;
- }
+ wb_process_queue(wb_inode);
- if (request->flags.write_request.stack_wound
- && !request->flags.write_request.got_reply
- && (incomplete_writes != NULL)) {
- *incomplete_writes = 1;
- break;
- }
+ return 0;
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- offset_expected = request->stub->args.writev.off;
- }
+unwind:
+ STACK_UNWIND_STRICT(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- if (offset_expected != request->stub->args.writev.off) {
- if (non_contiguous_writes) {
- *non_contiguous_writes = 1;
- }
- break;
- }
+ if (stub)
+ call_stub_destroy(stub);
- offset_expected += request->write_size;
- }
- }
+ return 0;
+}
- return;
+int32_t
+wb_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT(frame->local);
+
+ if (op_ret == 0)
+ wb_set_inode_size(frame->local, postbuf);
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+int
+wb_ftruncate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, wb_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
- char wind_all, char enable_trickling_writes)
+int
+wb_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- size_t size = 0;
- char other_fop_in_queue = 0;
- char incomplete_writes = 0;
- char non_contiguous_writes = 0;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = 0;
- if (list_empty (list)) {
- goto out;
- }
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- request = list_entry (list->next, typeof (*request), list);
- file = request->file;
+ frame->local = wb_inode;
- if (!wind_all && (file->aggregate_current < aggregate_conf)) {
- __wb_can_wind (list, &other_fop_in_queue,
- &non_contiguous_writes, &incomplete_writes);
- }
+ stub = fop_ftruncate_stub(frame, wb_ftruncate_helper, fd, offset, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if ((enable_trickling_writes && !incomplete_writes)
- || (wind_all) || (non_contiguous_writes)
- || (other_fop_in_queue)
- || (file->aggregate_current >= aggregate_conf)) {
- size = __wb_mark_wind_all (file, list, winds);
- }
+ if (!wb_enqueue(wb_inode, stub)) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
-out:
- return size;
+ wb_process_queue(wb_inode);
+
+ return 0;
+
+unwind:
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
}
+int
+wb_setattr_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
-size_t
-__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size)
+int
+wb_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- size_t written_behind = 0;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (list_empty (list)) {
- goto out;
- }
+ wb_inode = wb_inode_ctx_get(this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- request = list_entry (list->next, typeof (*request), list);
- file = request->file;
+ stub = fop_setattr_stub(frame, wb_setattr_helper, loc, stbuf, valid, xdata);
+ if (!stub)
+ goto unwind;
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- continue;
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- if (written_behind <= size) {
- if (!request->flags.write_request.write_behind) {
- written_behind += request->write_size;
- request->flags.write_request.write_behind = 1;
- list_add_tail (&request->unwinds, unwinds);
-
- if (!request->flags.write_request.got_reply) {
- file->window_current += request->write_size;
- }
- }
- } else {
- break;
- }
- }
+ wb_process_queue(wb_inode);
-out:
- return written_behind;
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
+
+noqueue:
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
}
+int
+wb_fsetattr_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
-void
-__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds)
+int
+wb_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (list_empty (list)) {
- goto out;
- }
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- request = list_entry (list->next, typeof (*request), list);
- file = request->file;
+ stub = fop_fsetattr_stub(frame, wb_fsetattr_helper, fd, stbuf, valid,
+ xdata);
+ if (!stub)
+ goto unwind;
- if (file->window_current <= file->window_conf) {
- __wb_mark_unwind_till (list, unwinds,
- file->window_conf - file->window_current);
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
-out:
- return;
-}
+ wb_process_queue(wb_inode);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
-uint32_t
-__wb_get_other_requests (list_head_t *list, list_head_t *other_requests)
-{
- wb_request_t *request = NULL;
- uint32_t count = 0;
- list_for_each_entry (request, list, list) {
- if ((request->stub == NULL)
- || (request->stub->fop == GF_FOP_WRITE)) {
- break;
- }
-
- if (!request->flags.other_requests.marked_for_resume) {
- request->flags.other_requests.marked_for_resume = 1;
- list_add_tail (&request->other_requests,
- other_requests);
- count++;
- }
- }
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
- return count;
+noqueue:
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
}
-
int32_t
-wb_stack_unwind (list_head_t *unwinds)
+wb_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- struct stat buf = {0,};
- wb_request_t *request = NULL, *dummy = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
+ wb_inode_t *wb_inode = NULL;
- list_for_each_entry_safe (request, dummy, unwinds, unwinds)
- {
- frame = request->stub->frame;
- local = frame->local;
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &buf,
- &buf);
+ if (((flags & O_RDWR) || (flags & O_WRONLY)) && (flags & O_TRUNC))
+ wb_inode->size = 0;
- wb_request_unref (request);
- }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
- return 0;
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
-
int32_t
-wb_resume_other_requests (call_frame_t *frame, wb_file_t *file,
- list_head_t *other_requests)
+wb_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- int32_t ret = 0;
- wb_request_t *request = NULL, *dummy = NULL;
- int32_t fops_removed = 0;
- char wind = 0;
- call_stub_t *stub = NULL;
+ wb_inode_t *wb_inode = NULL;
- if (list_empty (other_requests)) {
- goto out;
- }
+ wb_inode = wb_inode_create(this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
- list_for_each_entry_safe (request, dummy, other_requests,
- other_requests) {
- wind = request->stub->wind;
- stub = request->stub;
-
- LOCK (&file->lock);
- {
- request->stub = NULL;
- }
- UNLOCK (&file->lock);
-
- if (!wind) {
- wb_request_unref (request);
- fops_removed++;
- }
-
- call_resume (stub);
- }
+ if (((flags & O_RDWR) || (flags & O_WRONLY)) && (flags & O_TRUNC))
+ wb_inode->size = 0;
- if (fops_removed > 0) {
- ret = wb_process_queue (frame, file, 0);
- }
-
-out:
- return ret;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int32_t
+wb_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ if (op_ret == 0) {
+ wb_inode_t *wb_inode = wb_inode_ctx_get(this, inode);
+ if (wb_inode)
+ wb_set_inode_size(wb_inode, buf);
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
}
+int
+wb_lookup_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND(frame, wb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
int32_t
-wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds,
- list_head_t *unwinds, list_head_t *other_requests)
+wb_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- ret = wb_stack_unwind (unwinds);
- if (ret == -1) {
- goto out;
- }
+ wb_inode = wb_inode_ctx_get(this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- ret = wb_sync (frame, file, winds);
- if (ret == -1) {
- goto out;
- }
+ stub = fop_lookup_stub(frame, wb_lookup_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
- ret = wb_resume_other_requests (frame, file, other_requests);
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
-out:
- return ret;
-}
+ wb_process_queue(wb_inode);
+ return 0;
-inline int
-__wb_copy_into_holder (wb_request_t *holder, wb_request_t *request)
-{
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int ret = -1;
+unwind:
+ if (stub)
+ call_stub_destroy(stub);
- if (holder->flags.write_request.virgin) {
- iobuf = iobuf_get (request->file->this->ctx->iobuf_pool);
- if (iobuf == NULL) {
- gf_log (request->file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
+ STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ return 0;
- iobref = iobref_new ();
- if (iobref == NULL) {
- iobuf_unref (iobuf);
- gf_log (request->file->this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
-
- ret = iobref_add (iobref, iobuf);
- if (ret != 0) {
- iobuf_unref (iobuf);
- iobref_unref (iobref);
- gf_log (request->file->this->name, GF_LOG_DEBUG,
- "cannot add iobuf (%p) into iobref (%p)",
- iobuf, iobref);
- goto out;
- }
-
- iov_unload (iobuf->ptr, holder->stub->args.writev.vector,
- holder->stub->args.writev.count);
- holder->stub->args.writev.vector[0].iov_base = iobuf->ptr;
-
- iobref_unref (holder->stub->args.writev.iobref);
- holder->stub->args.writev.iobref = iobref;
-
- iobuf_unref (iobuf);
-
- holder->flags.write_request.virgin = 0;
- }
+noqueue:
+ STACK_WIND(frame, wb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
- ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size;
+static void
+wb_mark_readdirp_start(xlator_t *this, inode_t *directory)
+{
+ wb_inode_t *wb_directory_inode = NULL;
- iov_unload (ptr,
- request->stub->args.writev.vector,
- request->stub->args.writev.count);
+ wb_directory_inode = wb_inode_create(this, directory);
- holder->stub->args.writev.vector[0].iov_len += request->write_size;
- holder->write_size += request->write_size;
+ if (!wb_directory_inode)
+ return;
- request->flags.write_request.stack_wound = 1;
- list_move_tail (&request->list, &request->file->passive_requests);
+ LOCK(&wb_directory_inode->lock);
+ {
+ GF_ATOMIC_INC(wb_directory_inode->readdirps);
+ }
+ UNLOCK(&wb_directory_inode->lock);
- ret = 0;
-out:
- return ret;
+ return;
}
+static void
+wb_mark_readdirp_end(xlator_t *this, inode_t *directory)
+{
+ wb_inode_t *wb_directory_inode = NULL, *wb_inode = NULL, *tmp = NULL;
+ int readdirps = 0;
-/* this procedure assumes that write requests have only one vector to write */
-void
-__wb_collapse_write_bufs (list_head_t *requests, size_t page_size)
-{
- off_t offset_expected = 0;
- size_t space_left = 0;
- wb_request_t *request = NULL, *tmp = NULL, *holder = NULL;
- int ret = 0;
-
- list_for_each_entry_safe (request, tmp, requests, list) {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)
- || (request->flags.write_request.stack_wound)) {
- holder = NULL;
- continue;
- }
+ wb_directory_inode = wb_inode_ctx_get(this, directory);
- if (request->flags.write_request.write_behind) {
- if (holder == NULL) {
- holder = request;
- continue;
- }
-
- offset_expected = holder->stub->args.writev.off
- + holder->write_size;
-
- if (request->stub->args.writev.off != offset_expected) {
- holder = request;
- continue;
- }
-
- space_left = page_size - holder->write_size;
-
- if (space_left >= request->write_size) {
- ret = __wb_copy_into_holder (holder, request);
- if (ret != 0) {
- break;
- }
-
- __wb_request_unref (request);
- } else {
- holder = request;
- }
- } else {
- break;
- }
+ if (!wb_directory_inode)
+ return;
+
+ LOCK(&wb_directory_inode->lock);
+ {
+ readdirps = GF_ATOMIC_DEC(wb_directory_inode->readdirps);
+ if (readdirps)
+ goto unlock;
+
+ list_for_each_entry_safe(wb_inode, tmp,
+ &wb_directory_inode->invalidate_list,
+ invalidate_list)
+ {
+ list_del_init(&wb_inode->invalidate_list);
+ GF_ATOMIC_INIT(wb_inode->invalidate, 0);
+ inode_unref(wb_inode->inode);
}
+ }
+unlock:
+ UNLOCK(&wb_directory_inode->lock);
- return;
+ return;
}
-
-int32_t
-wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all)
+int32_t
+wb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- list_head_t winds, unwinds, other_requests;
- size_t size = 0;
- wb_conf_t *conf = NULL;
- uint32_t count = 0;
- int32_t ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ gf_dirent_t *entry = NULL;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
- INIT_LIST_HEAD (&winds);
- INIT_LIST_HEAD (&unwinds);
- INIT_LIST_HEAD (&other_requests);
-
- if (file == NULL) {
- errno = EINVAL;
- goto out;
- }
+ fd = frame->local;
+ frame->local = NULL;
- conf = file->this->private;
- size = conf->aggregate_size;
- LOCK (&file->lock);
- {
- /*
- * make sure requests are marked for unwinding and adjacent
- * continguous write buffers (each of size less than that of
- * an iobuf) are packed properly so that iobufs are filled to
- * their maximum capacity, before calling __wb_mark_winds.
- */
- __wb_mark_unwinds (&file->request, &unwinds);
+ if (op_ret <= 0)
+ goto unwind;
- __wb_collapse_write_bufs (&file->request,
- file->this->ctx->page_size);
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode || !IA_ISREG(entry->d_stat.ia_type))
+ continue;
- count = __wb_get_other_requests (&file->request,
- &other_requests);
+ wb_inode = wb_inode_ctx_get(this, entry->inode);
+ if (!wb_inode)
+ continue;
- if (count == 0) {
- __wb_mark_winds (&file->request, &winds, size,
- flush_all,
- conf->enable_trickling_writes);
- }
+ LOCK(&wb_inode->lock);
+ {
+ if (!list_empty(&wb_inode->liability) ||
+ GF_ATOMIC_GET(wb_inode->invalidate)) {
+ inode = entry->inode;
+ entry->inode = NULL;
+ memset(&entry->d_stat, 0, sizeof(entry->d_stat));
+ }
}
- UNLOCK (&file->lock);
+ UNLOCK(&wb_inode->lock);
- ret = wb_do_ops (frame, file, &winds, &unwinds, &other_requests);
+ if (inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ }
-out:
- return ret;
-}
+unwind:
+ wb_mark_readdirp_end(this, fd->inode);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
int32_t
-wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+wb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ wb_mark_readdirp_start(this, fd->inode);
+
+ frame->local = fd;
+
+ STACK_WIND(frame, wb_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
}
+int32_t
+wb_link_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
int32_t
-wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
-{
- wb_file_t *file = NULL;
- char wb_disabled = 0;
- call_frame_t *process_frame = NULL;
- size_t size = 0;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1, op_errno = EINVAL;
-
- if (vector != NULL)
- size = iov_length (vector, count);
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- op_errno = EBADFD;
- goto unwind;
- }
+wb_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- file = (wb_file_t *)(long)tmp_file;
- if ((!S_ISDIR (fd->inode->st_mode)) && (file == NULL)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wb_file not found for fd %p", fd);
- op_errno = EBADFD;
- goto unwind;
- }
+ wb_inode = wb_inode_ctx_get(this, oldloc->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (file != NULL) {
- LOCK (&file->lock);
- {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
-
- file->op_ret = 0;
-
- if ((op_ret == 0)
- && (file->disabled || file->disable_till)) {
- if (size > file->disable_till) {
- file->disable_till = 0;
- } else {
- file->disable_till -= size;
- }
- wb_disabled = 1;
- }
- }
- UNLOCK (&file->lock);
- } else {
- wb_disabled = 1;
- }
+ stub = fop_link_stub(frame, wb_link_helper, oldloc, newloc, xdata);
+ if (!stub)
+ goto unwind;
- if (op_ret == -1) {
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
- NULL, NULL);
- return 0;
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- if (wb_disabled) {
- STACK_WIND (frame, wb_writev_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->writev,
- fd, vector, count, offset, iobref);
- return 0;
- }
+ wb_process_queue(wb_inode);
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ return 0;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- return 0;
- }
+unwind:
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
- frame->local = local;
- local->file = file;
+ if (stub)
+ call_stub_destroy(stub);
- stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
- iobref);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ return 0;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_errno = ENOMEM;
- goto unwind;
- }
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
- STACK_DESTROY (process_frame->root);
+int32_t
+wb_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset,
+ len, xdata);
+ return 0;
+}
- return 0;
+int32_t
+wb_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
-unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (process_frame) {
- STACK_DESTROY (process_frame->root);
- }
+ stub = fop_fallocate_stub(frame, wb_fallocate_helper, fd, keep_size, offset,
+ len, xdata);
+ if (!stub)
+ goto unwind;
- if (stub) {
- call_stub_destroy (stub);
- }
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- return 0;
-}
+ wb_process_queue(wb_inode);
+ return 0;
-int32_t
-wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
-{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if ((request != NULL) && (file != NULL)) {
- wb_request_unref (request);
-
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+unwind:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ if (stub)
+ call_stub_destroy(stub);
- return 0;
-}
+ return 0;
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset,
+ len, xdata);
+ return 0;
+}
-static int32_t
-wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+int32_t
+wb_discard_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- STACK_WIND (frame,
- wb_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
-
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+ return 0;
}
-
int32_t
-wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- int32_t ret = -1;
- wb_request_t *request = NULL;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (readv, frame, -1, EBADFD,
- NULL, 0, NULL, NULL);
- return 0;
- }
+wb_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- file = (wb_file_t *)(long)tmp_file;
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- return 0;
- }
+ stub = fop_discard_stub(frame, wb_discard_helper, fd, offset, len, xdata);
+ if (!stub)
+ goto unwind;
- local->file = file;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- frame->local = local;
- if (file) {
- stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
- offset);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- return 0;
- }
+ wb_process_queue(wb_inode);
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ return 0;
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+unwind:
+ STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
- } else {
- STACK_WIND (frame,
- wb_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- }
+ if (stub)
+ call_stub_destroy(stub);
+ return 0;
- return 0;
-}
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard,
+ fd, offset, len, xdata);
+ return 0;
+}
int32_t
-wb_ffr_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+wb_zerofill_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
}
-
int32_t
-wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
- char unwind = 0;
- int32_t ret = -1;
- int disabled = 0;
- int64_t disable_till = 0;
-
- conf = this->private;
- local = frame->local;
-
- if ((local != NULL) && (local->file != NULL)) {
- file = local->file;
-
- LOCK (&file->lock);
- {
- disabled = file->disabled;
- disable_till = file->disable_till;
- }
- UNLOCK (&file->lock);
+wb_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- unwind = 1;
- } else {
- local->reply_count++;
- /*
- * without flush-behind, unwind should wait for replies
- * of writes queued before and the flush
- */
- if (local->reply_count == 2) {
- unwind = 1;
- }
- }
- } else {
- unwind = 1;
- }
+ wb_inode = wb_inode_ctx_get(this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (unwind) {
- if (file != NULL) {
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
-
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
-
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
-
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- }
+ stub = fop_zerofill_stub(frame, wb_zerofill_helper, fd, offset, len, xdata);
+ if (!stub)
+ goto unwind;
- return 0;
-}
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue(wb_inode);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy(stub);
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
+}
int32_t
-wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- wb_conf_t *conf = NULL;
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- call_frame_t *process_frame = NULL;
- wb_local_t *tmp_local = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
- int disabled = 0;
- int64_t disable_till = 0;
-
- conf = this->private;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (flush, frame, -1, EBADFD);
- return 0;
- }
+wb_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- file = (wb_file_t *)(long)tmp_file;
+ wb_inode = wb_inode_ctx_get(this, oldloc->inode);
+ if (!wb_inode)
+ goto noqueue;
- if (file != NULL) {
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_rename_stub(frame, default_rename_resume, oldloc, newloc, xdata);
+ if (!stub)
+ goto unwind;
- local->file = file;
+ if (!wb_enqueue(wb_inode, stub))
+ goto unwind;
- frame->local = local;
- stub = fop_flush_cbk_stub (frame, wb_ffr_cbk, 0, 0);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- return 0;
- }
+ wb_process_queue(wb_inode);
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- call_stub_destroy (stub);
- return 0;
- }
+ return 0;
- LOCK (&file->lock);
- {
- disabled = file->disabled;
- disable_till = file->disable_till;
- }
- UNLOCK (&file->lock);
-
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- tmp_local = CALLOC (1, sizeof (*local));
- if (tmp_local == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
-
- STACK_DESTROY (process_frame->root);
- call_stub_destroy (stub);
- return 0;
- }
- tmp_local->file = file;
-
- process_frame->local = tmp_local;
- }
+unwind:
+ if (stub)
+ call_stub_destroy(stub);
- fd_ref (fd);
+ STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ return 0;
- fd_unref (fd);
- call_stub_destroy (stub);
- STACK_DESTROY (process_frame->root);
- return 0;
- }
+noqueue:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
- ret = wb_process_queue (process_frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+int
+wb_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp = 0;
+ wb_inode_t *wb_inode = NULL;
- fd_unref (fd);
- call_stub_destroy (stub);
- STACK_DESTROY (process_frame->root);
- return 0;
- }
- }
-
- if ((file != NULL) && conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- STACK_WIND (process_frame,
- wb_ffr_bg_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- } else {
- STACK_WIND (frame,
- wb_ffr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
-
- if (process_frame != NULL) {
- STACK_DESTROY (process_frame->root);
- }
- }
+ inode_ctx_del(inode, this, &tmp);
-
- if (file != NULL) {
- fd_unref (fd);
- }
+ wb_inode = (wb_inode_t *)(long)tmp;
+ if (!wb_inode)
return 0;
+
+ wb_inode_destroy(wb_inode);
+
+ return 0;
}
+int
+wb_release(xlator_t *this, fd_t *fd)
+{
+ uint64_t tmp = 0;
+
+ (void)fd_ctx_del(fd, this, &tmp);
-static int32_t
-wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *prebuf, struct stat *postbuf)
+ return 0;
+}
+
+int
+wb_priv_dump(xlator_t *this)
{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
+ wb_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = -1;
- local = frame->local;
- file = local->file;
- request = local->request;
+ GF_VALIDATE_OR_GOTO("write-behind", this, out);
- if (file != NULL) {
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
-
- if (request) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.write-behind",
+ "priv");
- }
+ gf_proc_dump_add_section("%s", key_prefix);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
-
- return 0;
-}
+ gf_proc_dump_write("aggregate_size", "%" PRIu64, conf->aggregate_size);
+ gf_proc_dump_write("window_size", "%" PRIu64, conf->window_size);
+ gf_proc_dump_write("flush_behind", "%d", conf->flush_behind);
+ gf_proc_dump_write("trickling_writes", "%d", conf->trickling_writes);
+ ret = 0;
+out:
+ return ret;
+}
-static int32_t
-wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+void
+__wb_dump_requests(struct list_head *head, char *prefix)
{
- STACK_WIND (frame,
- wb_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, datasync);
- return 0;
-}
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] =
+ {
+ 0,
+ },
+ flag = 0;
+ wb_request_t *req = NULL;
+ list_for_each_entry(req, head, all)
+ {
+ gf_proc_dump_build_key(key_prefix, key, "%s",
+ (char *)gf_fop_list[req->fop]);
-int32_t
-wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
-{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND_STRICT (fsync, frame, -1, EBADFD, NULL, NULL);
- return 0;
- }
+ gf_proc_dump_add_section("%s", key_prefix);
- file = (wb_file_t *)(long)tmp_file;
+ gf_proc_dump_write("unique", "%" PRIu64, req->unique);
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ gf_proc_dump_write("refcount", "%d", req->refcount);
- local->file = file;
+ if (list_empty(&req->todo))
+ gf_proc_dump_write("wound", "yes");
+ else
+ gf_proc_dump_write("wound", "no");
- frame->local = local;
+ gf_proc_dump_write("generation-number", "%" PRIu64, req->gen);
- if (file) {
- stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
- return 0;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ gf_proc_dump_write("req->op_ret", "%d", req->op_ret);
+ gf_proc_dump_write("req->op_errno", "%d", req->op_errno);
+ gf_proc_dump_write("sync-attempts", "%d", req->wind_count);
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
- call_stub_destroy (stub);
- return 0;
- }
-
- } else {
- STACK_WIND (frame,
- wb_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, datasync);
- }
+ if (req->fop == GF_FOP_WRITE) {
+ if (list_empty(&req->wip))
+ gf_proc_dump_write("sync-in-progress", "no");
+ else
+ gf_proc_dump_write("sync-in-progress", "yes");
- return 0;
-}
+ gf_proc_dump_write("size", "%" GF_PRI_SIZET, req->write_size);
+ if (req->stub)
+ gf_proc_dump_write("offset", "%" PRId64,
+ req->stub->args.offset);
-int32_t
-wb_release (xlator_t *this, fd_t *fd)
-{
- uint64_t file_ptr = 0;
- wb_file_t *file = NULL;
+ flag = req->ordering.lied;
+ gf_proc_dump_write("lied", "%d", flag);
- fd_ctx_get (fd, this, &file_ptr);
- file = (wb_file_t *) (long) file_ptr;
+ flag = req->ordering.append;
+ gf_proc_dump_write("append", "%d", flag);
- if (file != NULL) {
- LOCK (&file->lock);
- {
- assert (list_empty (&file->request));
- }
- UNLOCK (&file->lock);
+ flag = req->ordering.fulfilled;
+ gf_proc_dump_write("fulfilled", "%d", flag);
- wb_file_destroy (file);
+ flag = req->ordering.go;
+ gf_proc_dump_write("go", "%d", flag);
}
-
- return 0;
+ }
}
int
-wb_priv_dump (xlator_t *this)
+wb_inode_dump(xlator_t *this, inode_t *inode)
{
- wb_conf_t *conf = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ wb_inode_t *wb_inode = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char uuid_str[64] = {
+ 0,
+ };
+
+ if ((inode == NULL) || (this == NULL)) {
+ ret = 0;
+ goto out;
+ }
- if (!this)
- return -1;
+ wb_inode = wb_inode_ctx_get(this, inode);
+ if (wb_inode == NULL) {
+ ret = 0;
+ goto out;
+ }
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
+ uuid_utoa_r(inode->gfid, uuid_str);
+
+ gf_proc_dump_build_key(key_prefix, "xlator.performance.write-behind",
+ "wb_inode");
+
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ __inode_path(inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write("path", "%s", path);
+ GF_FREE(path);
+ }
+
+ gf_proc_dump_write("inode", "%p", inode);
+
+ gf_proc_dump_write("gfid", "%s", uuid_str);
+
+ gf_proc_dump_write("window_conf", "%" GF_PRI_SIZET, wb_inode->window_conf);
+
+ gf_proc_dump_write("window_current", "%" GF_PRI_SIZET,
+ wb_inode->window_current);
+
+ gf_proc_dump_write("transit-size", "%" GF_PRI_SIZET, wb_inode->transit);
+
+ gf_proc_dump_write("dontsync", "%d", wb_inode->dontsync);
+
+ ret = TRY_LOCK(&wb_inode->lock);
+ if (!ret) {
+ if (!list_empty(&wb_inode->all)) {
+ __wb_dump_requests(&wb_inode->all, key_prefix);
}
+ UNLOCK(&wb_inode->lock);
+ }
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.write-behind",
- "priv");
-
- gf_proc_dump_add_section (key_prefix);
-
- gf_proc_dump_build_key (key, key_prefix, "aggregate_size");
- gf_proc_dump_write (key, "%d", conf->aggregate_size);
- gf_proc_dump_build_key (key, key_prefix, "window_size");
- gf_proc_dump_write (key, "%d", conf->window_size);
- gf_proc_dump_build_key (key, key_prefix, "disable_till");
- gf_proc_dump_write (key, "%d", conf->disable_till);
- gf_proc_dump_build_key (key, key_prefix, "enable_O_SYNC");
- gf_proc_dump_write (key, "%d", conf->enable_O_SYNC);
- gf_proc_dump_build_key (key, key_prefix, "flush_behind");
- gf_proc_dump_write (key, "%d", conf->flush_behind);
- gf_proc_dump_build_key (key, key_prefix, "enable_trickling_writes");
- gf_proc_dump_write (key, "%d", conf->enable_trickling_writes);
+ if (ret && wb_inode)
+ gf_proc_dump_write("Unable to dump the inode information",
+ "(Lock acquisition failed) %p (gfid: %s)", wb_inode,
+ uuid_str);
- return 0;
+ ret = 0;
+out:
+ return ret;
}
-int32_t
-init (xlator_t *this)
+int
+mem_acct_init(xlator_t *this)
{
- dict_t *options = NULL;
- wb_conf_t *conf = NULL;
- char *str = NULL;
- int32_t ret = -1;
+ int ret = -1;
- if ((this->children == NULL)
- || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: write-behind (%s) not configured with exactly "
- "one child",
- this->name);
- return -1;
- }
+ if (!this) {
+ goto out;
+ }
- if (this->parents == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- options = this->options;
-
- conf = CALLOC (1, sizeof (*conf));
- if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
- return -1;
- }
-
- conf->enable_O_SYNC = _gf_false;
- ret = dict_get_str (options, "enable-O_SYNC",
- &str);
- if (ret == 0) {
- ret = gf_string2boolean (str,
- &conf->enable_O_SYNC);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'enable-O_SYNC' takes only boolean arguments");
- return -1;
- }
- }
+ ret = xlator_mem_acct_init(this, gf_wb_mt_end + 1);
- /* configure 'options aggregate-size <size>' */
- conf->aggregate_size = WB_AGGREGATE_SIZE;
- conf->disable_till = 0;
- ret = dict_get_str (options, "disable-for-first-nbytes",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str,
- &conf->disable_till);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "disable-for-first-nbytes\"",
- str);
- return -1;
- }
- }
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, WRITE_BEHIND_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "disabling write-behind for first %"PRIu64" bytes",
- conf->disable_till);
-
- /* configure 'option window-size <size>' */
- conf->window_size = WB_WINDOW_SIZE;
- ret = dict_get_str (options, "cache-size",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str,
- &conf->window_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "window-size\"",
- str);
- FREE (conf);
- return -1;
- }
- }
+out:
+ return ret;
+}
- if (!conf->window_size && conf->aggregate_size) {
- gf_log (this->name, GF_LOG_WARNING,
- "setting window-size to be equal to "
- "aggregate-size(%"PRIu64")",
- conf->aggregate_size);
- conf->window_size = conf->aggregate_size;
- }
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ wb_conf_t *conf = NULL;
+ int ret = -1;
- if (conf->window_size < conf->aggregate_size) {
- gf_log (this->name, GF_LOG_ERROR,
- "aggregate-size(%"PRIu64") cannot be more than "
- "window-size"
- "(%"PRIu64")", conf->window_size, conf->aggregate_size);
- FREE (conf);
- return -1;
- }
+ conf = this->private;
- /* configure 'option flush-behind <on/off>' */
- conf->flush_behind = 1;
- ret = dict_get_str (options, "flush-behind",
- &str);
- if (ret == 0) {
- ret = gf_string2boolean (str,
- &conf->flush_behind);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'flush-behind' takes only boolean arguments");
- return -1;
- }
+ GF_OPTION_RECONF("cache-size", conf->window_size, options, size_uint64,
+ out);
- if (conf->flush_behind) {
- gf_log (this->name, GF_LOG_DEBUG,
- "enabling flush-behind");
- }
- }
+ GF_OPTION_RECONF("flush-behind", conf->flush_behind, options, bool, out);
- conf->enable_trickling_writes = _gf_true;
- ret = dict_get_str (options, "enable-trickling-writes",
- &str);
- if (ret == 0) {
- ret = gf_string2boolean (str,
- &conf->enable_trickling_writes);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'enable-trickling_writes' takes only boolean"
- " arguments");
- return -1;
- }
- }
+ GF_OPTION_RECONF("trickling-writes", conf->trickling_writes, options, bool,
+ out);
- this->private = conf;
- return 0;
+ GF_OPTION_RECONF("strict-O_DIRECT", conf->strict_O_DIRECT, options, bool,
+ out);
+
+ GF_OPTION_RECONF("strict-write-ordering", conf->strict_write_ordering,
+ options, bool, out);
+ GF_OPTION_RECONF("resync-failed-syncs-after-fsync",
+ conf->resync_after_fsync, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
}
+int32_t
+init(xlator_t *this)
+{
+ wb_conf_t *conf = NULL;
+ int32_t ret = -1;
+
+ if ((this->children == NULL) || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, WRITE_BEHIND_MSG_INIT_FAILED,
+ "FATAL: write-behind (%s) not configured with exactly "
+ "one child",
+ this->name);
+ goto out;
+ }
+
+ if (this->parents == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ WRITE_BEHIND_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfilex");
+ }
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_wb_mt_wb_conf_t);
+ if (conf == NULL) {
+ goto out;
+ }
+
+ /* configure 'options aggregate-size <size>' */
+ GF_OPTION_INIT("aggregate-size", conf->aggregate_size, size_uint64, out);
+ conf->page_size = conf->aggregate_size;
+
+ /* configure 'option window-size <size>' */
+ GF_OPTION_INIT("cache-size", conf->window_size, size_uint64, out);
+
+ if (!conf->window_size && conf->aggregate_size) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, WRITE_BEHIND_MSG_SIZE_NOT_SET,
+ "setting window-size to be equal to "
+ "aggregate-size(%" PRIu64 ")",
+ conf->aggregate_size);
+ conf->window_size = conf->aggregate_size;
+ }
+
+ if (conf->window_size < conf->aggregate_size) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, WRITE_BEHIND_MSG_EXCEEDED_MAX_SIZE,
+ "aggregate-size(%" PRIu64
+ ") cannot be more than "
+ "window-size(%" PRIu64 ")",
+ conf->aggregate_size, conf->window_size);
+ goto out;
+ }
+
+ /* configure 'option flush-behind <on/off>' */
+ GF_OPTION_INIT("flush-behind", conf->flush_behind, bool, out);
+
+ GF_OPTION_INIT("trickling-writes", conf->trickling_writes, bool, out);
+
+ GF_OPTION_INIT("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out);
+
+ GF_OPTION_INIT("strict-write-ordering", conf->strict_write_ordering, bool,
+ out);
+
+ GF_OPTION_INIT("resync-failed-syncs-after-fsync", conf->resync_after_fsync,
+ bool, out);
+
+ this->private = conf;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE(conf);
+ }
+ return ret;
+}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- wb_conf_t *conf = this->private;
+ wb_conf_t *conf = NULL;
- FREE (conf);
- return;
-}
+ GF_VALIDATE_OR_GOTO("write-behind", this, out);
+ conf = this->private;
+ if (!conf) {
+ goto out;
+ }
-struct xlator_fops fops = {
- .writev = wb_writev,
- .open = wb_open,
- .create = wb_create,
- .readv = wb_readv,
- .flush = wb_flush,
- .fsync = wb_fsync,
- .stat = wb_stat,
- .fstat = wb_fstat,
- .truncate = wb_truncate,
- .ftruncate = wb_ftruncate,
- .setattr = wb_setattr,
-};
+ this->private = NULL;
+ GF_FREE(conf);
-struct xlator_mops mops = {
-};
+out:
+ return;
+}
-struct xlator_cbks cbks = {
- .release = wb_release
+struct xlator_fops fops = {
+ .writev = wb_writev,
+ .readv = wb_readv,
+ .flush = wb_flush,
+ .fsync = wb_fsync,
+ .stat = wb_stat,
+ .fstat = wb_fstat,
+ .truncate = wb_truncate,
+ .ftruncate = wb_ftruncate,
+ .setattr = wb_setattr,
+ .fsetattr = wb_fsetattr,
+ .lookup = wb_lookup,
+ .readdirp = wb_readdirp,
+ .link = wb_link,
+ .fallocate = wb_fallocate,
+ .discard = wb_discard,
+ .zerofill = wb_zerofill,
+ .rename = wb_rename,
};
+struct xlator_cbks cbks = {.forget = wb_forget, .release = wb_release};
+
struct xlator_dumpops dumpops = {
- .priv = wb_priv_dump,
+ .priv = wb_priv_dump,
+ .inodectx = wb_inode_dump,
};
struct volume_options options[] = {
- { .key = {"flush-behind"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"cache-size", "window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 512 * GF_UNIT_KB,
- .max = 1 * GF_UNIT_GB
- },
- { .key = {"disable-for-first-nbytes"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 1,
- .max = 1 * GF_UNIT_MB,
- },
- { .key = {"enable-O_SYNC"},
- .type = GF_OPTION_TYPE_BOOL,
- },
- { .key = {"enable-trickling-writes"},
- .type = GF_OPTION_TYPE_BOOL,
- },
- { .key = {NULL} },
+ {
+ .key = {"write-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable write-behind",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {.key = {"flush-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "If this option is set ON, instructs write-behind "
+ "translator to perform flush in background, by "
+ "returning success (or any errors, if any of "
+ "previous writes were failed) to application even "
+ "before flush FOP is sent to backend filesystem. "},
+ {.key = {"cache-size", "window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 512 * GF_UNIT_KB,
+ .max = 1 * GF_UNIT_GB,
+ .default_value = "1MB",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "Size of the write-behind buffer for a single file "
+ "(inode)."},
+ {
+ .key = {"trickling-writes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {GD_OP_VERSION_3_13_1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .default_value = "on",
+ },
+ {.key = {"strict-O_DIRECT"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "This option when set to off, ignores the "
+ "O_DIRECT flag."},
+ {
+ .key = {"strict-write-ordering"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "Do not let later writes overtake earlier writes even "
+ "if they do not overlap",
+ },
+ {
+ .key = {"resync-failed-syncs-after-fsync"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_3_7_7},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"write-behind"},
+ .description = "If sync of \"cached-writes issued before fsync\" "
+ "(to backend) fails, this option configures whether "
+ "to retry syncing them after fsync or forget them. "
+ "If set to on, cached-writes are retried "
+ "till a \"flush\" fop (or a successful sync) on sync "
+ "failures. "
+ "fsync itself is failed irrespective of the value of "
+ "this option. ",
+ },
+ {
+ .key = {"aggregate-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "128KB",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .description = "Will aggregate writes until data of specified "
+ "size is fully filled for a single file provided "
+ "there are no dependent fops on cached writes. This "
+ "option just sets the aggregate size. Note that "
+ "aggregation won't happen if "
+ "performance.write-behind-trickling-writes"
+ " is turned on. Hence turn off "
+ "performance.write-behind.trickling-writes"
+ " so that writes are aggregated till a max of "
+ "\"aggregate-size\" bytes",
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "write-behind",
+ .category = GF_MAINTAINED,
};