summaryrefslogtreecommitdiffstats
path: root/xlators/performance/io-cache/src/io-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/performance/io-cache/src/io-cache.c')
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2588
1 files changed, 1571 insertions, 1017 deletions
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index e87fc856a..201777b38 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,412 +18,282 @@
#include "dict.h"
#include "xlator.h"
#include "io-cache.h"
+#include "ioc-mem-types.h"
+#include "statedump.h"
#include <assert.h>
#include <sys/time.h>
-uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path);
+int ioc_log2_page_size;
uint32_t
ioc_get_priority (ioc_table_t *table, const char *path);
-inline ioc_inode_t *
+struct volume_options options[];
+
+
+static inline uint32_t
+ioc_hashfn (void *data, int len)
+{
+ off_t offset;
+
+ offset = *(off_t *) data;
+
+ return (offset >> ioc_log2_page_size);
+}
+
+static inline ioc_inode_t *
ioc_inode_reupdate (ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = ioc_inode->table;
+ ioc_table_t *table = NULL;
+
+ table = ioc_inode->table;
- list_add_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
-
- return ioc_inode;
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+
+ return ioc_inode;
}
-inline ioc_inode_t *
+static inline ioc_inode_t *
ioc_get_inode (dict_t *dict, char *name)
{
- ioc_inode_t *ioc_inode = NULL;
- data_t *ioc_inode_data = dict_get (dict, name);
- ioc_table_t *table = NULL;
-
- if (ioc_inode_data) {
- ioc_inode = data_to_ptr (ioc_inode_data);
- table = ioc_inode->table;
-
- ioc_table_lock (table);
- {
- if (list_empty (&ioc_inode->inode_lru)) {
- ioc_inode = ioc_inode_reupdate (ioc_inode);
- }
- }
- ioc_table_unlock (table);
- }
-
- return ioc_inode;
+ ioc_inode_t *ioc_inode = NULL;
+ data_t *ioc_inode_data = NULL;
+ ioc_table_t *table = NULL;
+
+ ioc_inode_data = dict_get (dict, name);
+ if (ioc_inode_data) {
+ ioc_inode = data_to_ptr (ioc_inode_data);
+ table = ioc_inode->table;
+
+ ioc_table_lock (table);
+ {
+ if (list_empty (&ioc_inode->inode_lru)) {
+ ioc_inode = ioc_inode_reupdate (ioc_inode);
+ }
+ }
+ ioc_table_unlock (table);
+ }
+
+ return ioc_inode;
}
int32_t
ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {0,};
- int32_t ret = -1;
- ioc_table_t *table = ioc_inode->table;
+ int8_t need_revalidate = 0;
+ struct timeval tv = {0,};
+ ioc_table_t *table = NULL;
+
+ table = ioc_inode->table;
- ret = gettimeofday (&tv, NULL);
+ gettimeofday (&tv, NULL);
- if (time_elapsed (&tv, &ioc_inode->tv) >= table->cache_timeout)
- need_revalidate = 1;
+ if (time_elapsed (&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
+ need_revalidate = 1;
- return need_revalidate;
+ return need_revalidate;
}
/*
* __ioc_inode_flush - flush all the cached pages of the given inode
*
- * @ioc_inode:
+ * @ioc_inode:
*
* assumes lock is held
*/
-int32_t
+int64_t
__ioc_inode_flush (ioc_inode_t *ioc_inode)
{
- ioc_page_t *curr = NULL, *next = NULL;
- int32_t destroy_size = 0;
- int32_t ret = 0;
-
- list_for_each_entry_safe (curr, next, &ioc_inode->pages, pages) {
- ret = ioc_page_destroy (curr);
-
- if (ret != -1)
- destroy_size += ret;
- }
-
- return destroy_size;
+ ioc_page_t *curr = NULL, *next = NULL;
+ int64_t destroy_size = 0;
+ int64_t ret = 0;
+
+ list_for_each_entry_safe (curr, next, &ioc_inode->cache.page_lru,
+ page_lru) {
+ ret = __ioc_page_destroy (curr);
+
+ if (ret != -1)
+ destroy_size += ret;
+ }
+
+ return destroy_size;
}
void
ioc_inode_flush (ioc_inode_t *ioc_inode)
{
- int32_t destroy_size = 0;
-
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
- ioc_inode_unlock (ioc_inode);
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- return;
+ int64_t destroy_size = 0;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ return;
}
int32_t
ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct stat *preop, struct stat *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, preop, postop);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
}
int32_t
ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct stat *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ inode_ctx_get (loc->inode, this, &ioc_inode);
- if (ioc_inode
+ if (ioc_inode
&& ((valid & GF_SET_ATTR_ATIME)
|| (valid & GF_SET_ATTR_MTIME)))
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ return 0;
}
int32_t
ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *stbuf, dict_t *dict, struct stat *postparent)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
{
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = frame->local;
- ioc_table_t *table = this->private;
- ioc_page_t *page = NULL;
- data_t *content_data = NULL;
- char *src = NULL;
- char need_unref = 0;
- uint8_t cache_still_valid = 0;
- uint32_t weight = 0;
- uint64_t tmp_ioc_inode = 0;
- char *buf = NULL;
- char *tmp = NULL;
- int i;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
-
- if (op_ret != 0)
- goto out;
-
- inode_ctx_get (inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (ioc_inode) {
- cache_still_valid = ioc_cache_still_valid (ioc_inode,
- stbuf);
-
- if (!cache_still_valid) {
- ioc_inode_flush (ioc_inode);
- }
- /* update the time-stamp of revalidation */
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- if (local && stbuf->st_size &&
- local->need_xattr >= stbuf->st_size) {
- if (!ioc_inode) {
- weight = ioc_get_priority (table,
- local->file_loc.path);
- ioc_inode = ioc_inode_update (table,
- inode, weight);
- if (ioc_inode == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ uint8_t cache_still_valid = 0;
+ uint64_t tmp_ioc_inode = 0;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ ioc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
- inode_ctx_put (inode, this,
- (uint64_t)(long)ioc_inode);
- }
-
- ioc_inode_lock (ioc_inode);
- {
- content_data = dict_get (dict, "glusterfs.content");
- page = ioc_page_get (ioc_inode, 0);
-
- if (content_data) {
- if (page) {
- iobref_unref (page->iobref);
- free (page->vector);
- page->vector = NULL;
-
- ioc_table_lock (table);
- {
- table->cache_used -=
- iobref_size (page->iobref);
- }
- ioc_table_unlock (table);
- } else {
- page = ioc_page_create (ioc_inode, 0);
- if (page == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name,
- GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- }
+ local = frame->local;
+ if (local == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- src = data_to_ptr (content_data);
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- page->iobref = iobref_new ();
- if (page->iobref == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
-
- ioc_page_destroy (page);
- page = NULL;
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ table = this->private;
- iobref_add (page->iobref, iobuf);
-
- memcpy (iobuf->ptr, src, stbuf->st_size);
+ path = local->file_loc.path;
- page->vector = CALLOC (1,
- sizeof (*page->vector));
-
- if (page->vector == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
+ LOCK (&inode->lock);
+ {
+ __inode_ctx_get (inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- op_ret = -1;
- op_errno = ENOMEM;
- ioc_page_destroy (page);
- page = NULL;
- goto out;
- }
+ if (!ioc_inode) {
+ weight = ioc_get_priority (table, path);
- page->vector->iov_base = iobuf->ptr;
- page->vector->iov_len = stbuf->st_size;
- page->count = 1;
-
- page->waitq = NULL;
- page->size = stbuf->st_size;
- page->ready = 1;
-
- ioc_table_lock (table);
- {
- table->cache_used +=
- iobref_size (page->iobref);
- }
- ioc_table_unlock (table);
-
- } else {
- if (!(page && page->ready)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "page not present");
-
- ioc_inode_unlock (ioc_inode);
- STACK_WIND (frame, ioc_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- &local->file_loc,
- local->xattr_req);
- return 0;
- }
- buf = CALLOC (1, stbuf->st_size);
- if (buf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ ioc_inode = ioc_inode_update (table, inode,
+ weight);
- tmp = buf;
-
- for (i = 0; i < page->count; i++) {
- memcpy (tmp, page->vector[i].iov_base,
- page->vector[i].iov_len);
- tmp += page->vector[i].iov_len;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "serving file %s from cache",
- local->file_loc.path);
-
- if (!dict) {
- dict = get_new_dict ();
- if (dict == NULL) {
- /* logged in get_new_dict() */
- FREE (buf);
- buf = tmp = NULL;
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ __inode_ctx_put (inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+ }
+ UNLOCK (&inode->lock);
- need_unref = 1;
- dict_ref (dict);
- }
- dict_set (dict, "glusterfs.content",
- data_from_dynptr (buf,
- stbuf->st_size));
- }
-
- ioc_inode->mtime = stbuf->st_mtime;
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- if (content_data &&
- ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
- }
+ ioc_inode_lock (ioc_inode);
+ {
+ if (ioc_inode->cache.mtime == 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
-out:
- STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, dict);
+ ioc_inode->ia_size = stbuf->ia_size;
+ }
+ ioc_inode_unlock (ioc_inode);
- if (need_unref) {
- dict_unref (dict);
- }
+ cache_still_valid = ioc_cache_still_valid (ioc_inode,
+ stbuf);
- if (iobref)
- iobref_unref (iobref);
+ if (!cache_still_valid) {
+ ioc_inode_flush (ioc_inode);
+ }
- if (iobuf)
- iobuf_unref (iobuf);
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
- return 0;
+out:
+ if (frame->local != NULL) {
+ local = frame->local;
+ loc_wipe (&local->file_loc);
+ }
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ return 0;
}
-int32_t
+int32_t
ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
- uint64_t content_limit = 0;
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_page_t *page = NULL;
- ioc_local_t *local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
- if (GF_FILE_CONTENT_REQUESTED(xattr_req, &content_limit)) {
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
- local->need_xattr = content_limit;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
- frame->local = local;
-
- inode_ctx_get (loc->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
- if (ioc_inode) {
- ioc_inode_lock (ioc_inode);
- {
- page = ioc_page_get (ioc_inode, 0);
- if ((content_limit <=
- ioc_inode->table->page_size) &&
- page && page->ready) {
- local->need_xattr = -1;
- }
- }
- ioc_inode_unlock (ioc_inode);
- }
- }
-
- STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
+ ret = loc_copy (&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
- return 0;
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
}
/*
- * ioc_forget -
+ * ioc_forget -
*
* @frame:
* @this:
@@ -442,19 +303,33 @@ ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t
ioc_forget (xlator_t *this, inode_t *inode)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
+
+ return 0;
+}
+
+static int32_t
+ioc_invalidate(xlator_t *this, inode_t *inode)
+{
+ uint64_t ioc_addr = 0;
+ ioc_inode_t *ioc_inode = NULL;
- inode_ctx_get (inode, this, &ioc_inode);
+ inode_ctx_get(inode, this, (uint64_t *) &ioc_addr);
+ ioc_inode = (void *) ioc_addr;
if (ioc_inode)
- ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
-
+ ioc_inode_flush(ioc_inode);
+
return 0;
}
-
-/*
- * ioc_cache_validate_cbk -
+/*
+ * ioc_cache_validate_cbk -
*
* @frame:
* @cookie:
@@ -466,84 +341,89 @@ ioc_forget (xlator_t *this, inode_t *inode)
*/
int32_t
ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_inode_t *ioc_inode = NULL;
- size_t destroy_size = 0;
- struct stat *local_stbuf = NULL;
+ ioc_local_t *local = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ size_t destroy_size = 0;
+ struct iatt *local_stbuf = NULL;
local = frame->local;
- ioc_inode = local->inode;
+ ioc_inode = local->inode;
local_stbuf = stbuf;
- if ((op_ret == -1) ||
- ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
- "cache for inode(%p) is invalid. flushing all pages",
- ioc_inode);
- /* NOTE: only pages with no waiting frames are flushed by
- * ioc_inode_flush. page_fault will be generated for all
- * the pages which have waiting frames by ioc_inode_wakeup()
- */
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- if (op_ret >= 0)
- ioc_inode->mtime = stbuf->st_mtime;
- }
- ioc_inode_unlock (ioc_inode);
- local_stbuf = NULL;
- }
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- if (op_ret < 0)
- local_stbuf = NULL;
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
-
- /* any page-fault initiated by ioc_inode_wakeup() will have its own
- * fd_ref on fd, safe to unref validate frame's private copy
- */
- fd_unref (local->fd);
-
- STACK_DESTROY (frame->root);
+ if ((op_ret == -1) ||
+ ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
+ "cache for inode(%p) is invalid. flushing all pages",
+ ioc_inode);
+ /* NOTE: only pages with no waiting frames are flushed by
+ * ioc_inode_flush. page_fault will be generated for all
+ * the pages which have waiting frames by ioc_inode_wakeup()
+ */
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ if (op_ret >= 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec
+ = stbuf->ia_mtime_nsec;
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+ local_stbuf = NULL;
+ }
- return 0;
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ if (op_ret < 0)
+ local_stbuf = NULL;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
+
+ /* any page-fault initiated by ioc_inode_wakeup() will have its own
+ * fd_ref on fd, safe to unref validate frame's private copy
+ */
+ fd_unref (local->fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
}
int32_t
ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
{
- ioc_waitq_t *waiter = NULL, *trav = NULL;
- uint32_t page_found = 0;
- int32_t ret = 0;
-
- trav = ioc_inode->waitq;
-
- while (trav) {
- if (trav->data == page) {
- page_found = 1;
- break;
- }
- trav = trav->next;
- }
-
- if (!page_found) {
- waiter = CALLOC (1, sizeof (ioc_waitq_t));
+ ioc_waitq_t *waiter = NULL, *trav = NULL;
+ uint32_t page_found = 0;
+ int32_t ret = 0;
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ if (trav->data == page) {
+ page_found = 1;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (!page_found) {
+ waiter = GF_CALLOC (1, sizeof (ioc_waitq_t),
+ gf_ioc_mt_ioc_waitq_t);
if (waiter == NULL) {
gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
"out of memory");
@@ -551,13 +431,13 @@ ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
goto out;
}
- waiter->data = page;
- waiter->next = ioc_inode->waitq;
- ioc_inode->waitq = waiter;
- }
+ waiter->data = page;
+ waiter->next = ioc_inode->waitq;
+ ioc_inode->waitq = waiter;
+ }
-out:
- return ret;
+out:
+ return ret;
}
/*
@@ -570,15 +450,15 @@ out:
*/
int32_t
ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- ioc_page_t *page)
+ ioc_page_t *page)
{
- call_frame_t *validate_frame = NULL;
- ioc_local_t *validate_local = NULL;
- ioc_local_t *local = NULL;
- int32_t ret = 0;
+ call_frame_t *validate_frame = NULL;
+ ioc_local_t *validate_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t ret = 0;
local = frame->local;
- validate_local = CALLOC (1, sizeof (ioc_local_t));
+ validate_local = mem_get0 (THIS->local_pool);
if (validate_local == NULL) {
ret = -1;
local->op_ret = -1;
@@ -588,59 +468,58 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
goto out;
}
- validate_frame = copy_frame (frame);
+ validate_frame = copy_frame (frame);
if (validate_frame == NULL) {
ret = -1;
local->op_ret = -1;
local->op_errno = ENOMEM;
- FREE (validate_local);
+ mem_put (validate_local);
gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
"out of memory");
goto out;
}
- validate_local->fd = fd_ref (fd);
- validate_local->inode = ioc_inode;
- validate_frame->local = validate_local;
-
- STACK_WIND (validate_frame, ioc_cache_validate_cbk,
+ validate_local->fd = fd_ref (fd);
+ validate_local->inode = ioc_inode;
+ validate_frame->local = validate_local;
+
+ STACK_WIND (validate_frame, ioc_cache_validate_cbk,
FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->fstat, fd);
+ FIRST_CHILD (frame->this)->fops->fstat, fd, NULL);
out:
- return ret;
+ return ret;
}
-inline uint32_t
+static inline uint32_t
is_match (const char *path, const char *pattern)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
-
- return (ret == 0);
+ ret = fnmatch (pattern, path, FNM_NOESCAPE);
+
+ return (ret == 0);
}
uint32_t
ioc_get_priority (ioc_table_t *table, const char *path)
{
- uint32_t priority = 0;
- struct ioc_priority *curr = NULL;
-
- if (list_empty(&table->priority_list)) {
- priority = 1;
- }
- else {
- list_for_each_entry (curr, &table->priority_list, list) {
- if (is_match (path, curr->pattern))
- priority = curr->priority;
- }
- }
-
- return priority;
+ uint32_t priority = 1;
+ struct ioc_priority *curr = NULL;
+
+ if (list_empty(&table->priority_list))
+ return priority;
+
+ priority = 0;
+ list_for_each_entry (curr, &table->priority_list, list) {
+ if (is_match (path, curr->pattern))
+ priority = curr->priority;
+ }
+
+ return priority;
}
-/*
+/*
* ioc_open_cbk - open callback for io cache
*
* @frame: call frame
@@ -653,80 +532,74 @@ ioc_get_priority (ioc_table_t *table, const char *path)
*/
int32_t
ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- inode_t *inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
table = this->private;
- inode = local->file_loc.inode;
- path = local->file_loc.path;
- if (op_ret != -1) {
- /* look for ioc_inode corresponding to this fd */
- LOCK (&fd->inode->lock);
- {
- __inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
- if (!ioc_inode) {
- /*
- this is the first time someone is opening
- this file, assign weight
- */
- weight = ioc_get_priority (table, path);
-
- ioc_inode = ioc_inode_update (table, inode,
- weight);
- __inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
- } else {
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ if (op_ret != -1) {
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ //TODO: see why inode context is NULL and handle it.
+ if (!ioc_inode) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context is "
+ "NULL (%s)", uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if ((table->min_file_size > ioc_inode->ia_size)
+ || ((table->max_file_size > 0)
+ && (table->max_file_size < ioc_inode->ia_size))) {
+ fd_ctx_set (fd, this, 1);
}
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if ((local->flags & O_DIRECT)){
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set (fd, this, 1);
+ }
- }
- UNLOCK (&fd->inode->lock);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
- if (((inode->st_mode & S_ISGID)
- && !(inode->st_mode & S_IXGRP))) {
- fd_ctx_set (fd, this, 1);
- }
-
- /* If O_DIRECT open, we disable caching on it */
- if ((local->flags & O_DIRECT)){
- /* O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- FREE (local);
- frame->local = NULL;
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ /* weight = 0, we disable caching on it */
+ if (weight == 0) {
+ /* we allow a pattern-matched cache disable this way
+ */
+ fd_ctx_set (fd, this, 1);
+ }
+ }
- return 0;
+out:
+ mem_put (local);
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
}
/*
@@ -745,63 +618,182 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ int ret = -1;
local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
table = this->private;
path = local->file_loc.path;
- if (op_ret != -1) {
- {
- /* assign weight */
- weight = ioc_get_priority (table, path);
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+
+ if ((table->min_file_size > ioc_inode->ia_size)
+ || ((table->max_file_size > 0)
+ && (table->max_file_size < ioc_inode->ia_size))) {
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ inode_ctx_put (fd->inode, this,
+ (uint64_t)(long)ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if (local->flags & O_DIRECT) {
+ /*
+ * O_DIRECT is only for one fd, not the inode
+ * as a whole */
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
+
+ /* if weight == 0, we disable caching on it */
+ if (!weight) {
+ /* we allow a pattern-matched cache disable this way */
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
- ioc_inode = ioc_inode_update (table, inode, weight);
+ }
- inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
- }
- /*
- * If mandatory locking has been enabled on this file,
- * we disable caching on it
- */
- if ((inode->st_mode & S_ISGID) &&
- !(inode->st_mode & S_IXGRP)) {
- fd_ctx_set (fd, this, 1);
- }
-
- /* If O_DIRECT open, we disable caching on it */
- if (local->flags & O_DIRECT){
- /*
- * O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- frame->local = NULL;
- FREE (local);
-
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+out:
+ frame->local = NULL;
+ mem_put (local);
- return 0;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
}
+
+int32_t
+ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+ path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ inode_ctx_put (inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+
+out:
+ frame->local = NULL;
+
+ loc_wipe (&local->file_loc);
+ mem_put (local);
+
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
+
+ ret = loc_copy (&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+
+unwind:
+ if (local != NULL) {
+ loc_wipe (&local->file_loc);
+ mem_put (local);
+ }
+
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+
/*
* ioc_open - open fop for io cache
* @frame:
@@ -812,33 +804,34 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
-
- ioc_local_t *local = NULL;
- local = CALLOC (1, sizeof (ioc_local_t));
+ ioc_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
- local->flags = flags;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
-
- frame->local = local;
-
- STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
- return 0;
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd,
+ xdata);
+
+ return 0;
}
/*
* ioc_create - create fop for io cache
- *
+ *
* @frame:
* @this:
* @pathname:
@@ -848,25 +841,27 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
int32_t
ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- ioc_local_t *local = NULL;
-
- local = CALLOC (1, sizeof (ioc_local_t));
+ ioc_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL,
+ NULL, NULL, NULL, NULL);
return 0;
}
- local->flags = flags;
- local->file_loc.path = loc->path;
- frame->local = local;
-
- STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd);
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ frame->local = local;
- return 0;
+ STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+
+ return 0;
}
@@ -874,7 +869,7 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
/*
* ioc_release - release fop for io cache
- *
+ *
* @frame:
* @this:
* @fd:
@@ -883,11 +878,11 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int32_t
ioc_release (xlator_t *this, fd_t *fd)
{
- return 0;
+ return 0;
}
-/*
- * ioc_readv_disabled_cbk
+/*
+ * ioc_readv_disabled_cbk
* @frame:
* @cookie:
* @this:
@@ -896,163 +891,174 @@ ioc_release (xlator_t *this, fd_t *fd)
* @vector:
* @count:
*
- */
+ */
int32_t
ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf,
- struct iobref *iobref)
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+ return 0;
}
int32_t
ioc_need_prune (ioc_table_t *table)
{
- int64_t cache_difference = 0;
-
- ioc_table_lock (table);
- {
- cache_difference = table->cache_used - table->cache_size;
- }
- ioc_table_unlock (table);
-
- if (cache_difference > 0)
- return 1;
- else
- return 0;
+ int64_t cache_difference = 0;
+
+ ioc_table_lock (table);
+ {
+ cache_difference = table->cache_used - table->cache_size;
+ }
+ ioc_table_unlock (table);
+
+ if (cache_difference > 0)
+ return 1;
+ else
+ return 0;
}
/*
* ioc_dispatch_requests -
- *
+ *
* @frame:
* @inode:
*
- *
+ *
*/
void
ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
off_t offset, size_t size)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *trav = NULL;
- ioc_waitq_t *waitq = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- int32_t fault = 0;
- size_t trav_size = 0;
- off_t local_offset = 0;
- int32_t ret = -1;
- int8_t need_validate = 0;
- int8_t might_need_validate = 0; /*
- * if a page exists, do we need
- * to validate it?
- */
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *trav = NULL;
+ ioc_waitq_t *waitq = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ int32_t fault = 0;
+ size_t trav_size = 0;
+ off_t local_offset = 0;
+ int32_t ret = -1;
+ int8_t need_validate = 0;
+ int8_t might_need_validate = 0; /*
+ * if a page exists, do we need
+ * to validate it?
+ */
local = frame->local;
table = ioc_inode->table;
- rounded_offset = floor (offset, table->page_size);
- rounded_end = roof (offset + size, table->page_size);
- trav_offset = rounded_offset;
-
- /* once a frame does read, it should be waiting on something */
- local->wait_count++;
-
- /* Requested region can fall in three different pages,
- * 1. Ready - region is already in cache, we just have to serve it.
- * 2. In-transit - page fault has been generated on this page, we need
- * to wait till the page is ready
- * 3. Fault - page is not in cache, we have to generate a page fault
- */
-
- might_need_validate = ioc_inode_need_revalidate (ioc_inode);
-
- while (trav_offset < rounded_end) {
- ioc_inode_lock (ioc_inode);
- //{
-
- /* look for requested region in the cache */
- trav = ioc_page_get (ioc_inode, trav_offset);
-
- local_offset = max (trav_offset, offset);
- trav_size = min (((offset+size) - local_offset),
- table->page_size);
-
- if (!trav) {
- /* page not in cache, we need to generate page fault */
- trav = ioc_page_create (ioc_inode, trav_offset);
- fault = 1;
- if (!trav) {
- gf_log (frame->this->name, GF_LOG_CRITICAL,
- "out of memory");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
- }
-
- ioc_wait_on_page (trav, frame, local_offset, trav_size);
-
- if (trav->ready) {
- /* page found in cache */
- if (!might_need_validate && !ioc_inode->waitq) {
- /* fresh enough */
- gf_log (frame->this->name, GF_LOG_TRACE,
- "cache hit for trav_offset=%"PRId64""
- "/local_offset=%"PRId64"",
- trav_offset, local_offset);
- waitq = ioc_page_wakeup (trav);
- } else {
- /* if waitq already exists, fstat revalidate is
- already on the way */
- if (!ioc_inode->waitq) {
- need_validate = 1;
- }
-
- ret = ioc_wait_on_inode (ioc_inode, trav);
- if (ret < 0) {
+ rounded_offset = floor (offset, table->page_size);
+ rounded_end = roof (offset + size, table->page_size);
+ trav_offset = rounded_offset;
+
+ /* once a frame does read, it should be waiting on something */
+ local->wait_count++;
+
+ /* Requested region can fall in three different pages,
+ * 1. Ready - region is already in cache, we just have to serve it.
+ * 2. In-transit - page fault has been generated on this page, we need
+ * to wait till the page is ready
+ * 3. Fault - page is not in cache, we have to generate a page fault
+ */
+
+ might_need_validate = ioc_inode_need_revalidate (ioc_inode);
+
+ while (trav_offset < rounded_end) {
+ ioc_inode_lock (ioc_inode);
+ {
+ /* look for requested region in the cache */
+ trav = __ioc_page_get (ioc_inode, trav_offset);
+
+ local_offset = max (trav_offset, offset);
+ trav_size = min (((offset+size) - local_offset),
+ table->page_size);
+
+ if (!trav) {
+ /* page not in cache, we need to generate page
+ * fault
+ */
+ trav = __ioc_page_create (ioc_inode,
+ trav_offset);
+ fault = 1;
+ if (!trav) {
+ gf_log (frame->this->name,
+ GF_LOG_CRITICAL,
+ "out of memory");
local->op_ret = -1;
- local->op_errno = -ret;
- need_validate = 0;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ __ioc_wait_on_page (trav, frame, local_offset,
+ trav_size);
+
+ if (trav->ready) {
+ /* page found in cache */
+ if (!might_need_validate && !ioc_inode->waitq) {
+ /* fresh enough */
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "cache hit for trav_offset=%"
+ PRId64"/local_offset=%"PRId64"",
+ trav_offset, local_offset);
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
+ } else {
+ /* if waitq already exists, fstat
+ * revalidate is
+ * already on the way
+ */
+ if (!ioc_inode->waitq) {
+ need_validate = 1;
+ }
- waitq = ioc_page_wakeup (trav);
- ioc_inode_unlock (ioc_inode);
+ ret = ioc_wait_on_inode (ioc_inode,
+ trav);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ need_validate = 0;
- ioc_waitq_return (waitq);
- waitq = NULL;
- goto out;
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+ goto out;
+ }
}
- }
- }
-
- //}
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
-
- if (fault) {
- fault = 0;
- /* new page created, increase the table->cache_used */
- ioc_page_fault (ioc_inode, frame, fd, trav_offset);
- }
-
- if (need_validate) {
- need_validate = 0;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "sending validate request for "
- "inode(%"PRId64") at offset=%"PRId64"",
- fd->inode->ino, trav_offset);
- ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
+ }
+
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+
+ if (fault) {
+ fault = 0;
+ /* new page created, increase the table->cache_used */
+ ioc_page_fault (ioc_inode, frame, fd, trav_offset);
+ }
+
+ if (need_validate) {
+ need_validate = 0;
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "sending validate request for "
+ "inode(%s) at offset=%"PRId64"",
+ uuid_utoa (fd->inode->gfid), trav_offset);
+ ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
if (ret == -1) {
ioc_inode_lock (ioc_inode);
{
- waitq = ioc_page_wakeup (trav);
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
}
ioc_inode_unlock (ioc_inode);
@@ -1060,25 +1066,25 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
waitq = NULL;
goto out;
}
- }
-
- trav_offset += table->page_size;
- }
+ }
+
+ trav_offset += table->page_size;
+ }
out:
- ioc_frame_return (frame);
+ ioc_frame_return (frame);
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
- return;
+ return;
}
/*
* ioc_readv -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1088,70 +1094,107 @@ out:
*/
int32_t
ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = NULL;
- uint32_t weight = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (!ioc_inode) {
- /* caching disabled, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
- if (!fd_ctx_get (fd, this, NULL)) {
- /* disable caching for this fd, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
- local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t));
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = NULL;
+ uint32_t weight = 0;
+ ioc_table_t *table = NULL;
+ int32_t op_errno = -1;
+
+ if (!this) {
+ goto out;
+ }
+
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (!ioc_inode) {
+ /* caching disabled, go ahead with normal readv */
+ STACK_WIND (frame, ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv, fd, size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+
+ table = this->private;
+
+ if (!table) {
+ gf_log (this->name, GF_LOG_ERROR, "table is null");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if (!ioc_inode->cache.page_table) {
+ ioc_inode->cache.page_table
+ = rbthash_table_init
+ (IOC_PAGE_TABLE_BUCKET_COUNT,
+ ioc_hashfn, NULL, 0,
+ table->mem_pool);
+
+ if (ioc_inode->cache.page_table == NULL) {
+ op_errno = ENOMEM;
+ ioc_inode_unlock (ioc_inode);
+ goto out;
+ }
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (!fd_ctx_get (fd, this, NULL)) {
+ /* disable caching for this fd, go ahead with normal readv */
+ STACK_WIND (frame, ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv, fd, size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, 0, NULL, NULL);
- return 0;
+ op_errno = ENOMEM;
+ goto out;
}
- INIT_LIST_HEAD (&local->fill_list);
+ INIT_LIST_HEAD (&local->fill_list);
- frame->local = local;
- local->pending_offset = offset;
- local->pending_size = size;
- local->offset = offset;
- local->size = size;
- local->inode = ioc_inode;
+ frame->local = local;
+ local->pending_offset = offset;
+ local->pending_size = size;
+ local->offset = offset;
+ local->size = size;
+ local->inode = ioc_inode;
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
- frame, offset, size);
+ gf_log (this->name, GF_LOG_TRACE,
+ "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
+ frame, offset, size);
- weight = ioc_inode->weight;
+ weight = ioc_inode->weight;
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &ioc_inode->table->inode_lru[weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &ioc_inode->table->inode_lru[weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
- ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
-
- return 0;
+ ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
+ return 0;
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
}
/*
* ioc_writev_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1161,25 +1204,26 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
*/
int32_t
ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
local = frame->local;
- inode_ctx_get (local->fd->inode, this, &ioc_inode);
-
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (local->fd->inode, this, &ioc_inode);
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
/*
* ioc_writev
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1190,38 +1234,38 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
-
- local = CALLOC (1, sizeof (ioc_local_t));
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
- /* TODO: why is it not fd_ref'ed */
- local->fd = fd;
- frame->local = local;
+ /* TODO: why is it not fd_ref'ed */
+ local->fd = fd;
+ frame->local = local;
- inode_ctx_get (fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- iobref);
+ STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
}
/*
* ioc_truncate_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1230,14 +1274,15 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
* @buf:
*
*/
-int32_t
+int32_t
ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
@@ -1254,41 +1299,44 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
/*
* ioc_truncate -
- *
+ *
* @frame:
* @this:
* @loc:
* @offset:
*
*/
-int32_t
-ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+int32_t
+ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (loc->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
/*
* ioc_ftruncate -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1296,309 +1344,815 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
*
*/
int32_t
-ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (fd->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (fd->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
int32_t
ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
-int32_t
+int32_t
ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
+{
+ ioc_inode_t *ioc_inode = NULL;
+ uint64_t tmp_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &tmp_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_inode;
+ if (!ioc_inode) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode context is NULL: returning EBADFD");
+ STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL);
+ return 0;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata);
+
+ return 0;
+}
+
+int
+ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+int
+ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- ioc_inode_t *ioc_inode = NULL;
- uint64_t tmp_inode = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_inode;
- if (!ioc_inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "inode context is NULL: returning EBADFD");
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lk, fd, cmd, lock);
+ STACK_WIND (frame, ioc_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+}
+static int32_t
+ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
return 0;
}
+static int32_t
+ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret,
+ op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+
int32_t
ioc_get_priority_list (const char *opt_str, struct list_head *first)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct ioc_priority *curr = NULL, *tmp = NULL;
-
- string = strdup (opt_str);
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+
+ string = gf_strdup (opt_str);
if (string == NULL) {
max_pri = -1;
goto out;
}
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- stripe_str = strtok_r (string, ",", &tmp_str);
- while (stripe_str) {
- curr = CALLOC (1, sizeof (struct ioc_priority));
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ stripe_str = strtok_r (string, ",", &tmp_str);
+ while (stripe_str) {
+ curr = GF_CALLOC (1, sizeof (struct ioc_priority),
+ gf_ioc_mt_ioc_priority);
if (curr == NULL) {
max_pri = -1;
goto out;
}
- list_add_tail (&curr->list, first);
+ list_add_tail (&curr->list, first);
- dup_str = strdup (stripe_str);
+ dup_str = gf_strdup (stripe_str);
if (dup_str == NULL) {
max_pri = -1;
goto out;
}
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ if (!pattern) {
max_pri = -1;
goto out;
}
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
+ priority = strtok_r (NULL, ":", &tmp_str1);
+ if (!priority) {
max_pri = -1;
goto out;
}
- gf_log ("io-cache", GF_LOG_TRACE,
- "ioc priority : pattern %s : priority %s",
- pattern,
- priority);
+ gf_log ("io-cache", GF_LOG_TRACE,
+ "ioc priority : pattern %s : priority %s",
+ pattern,
+ priority);
- curr->pattern = strdup (pattern);
+ curr->pattern = gf_strdup (pattern);
if (curr->pattern == NULL) {
max_pri = -1;
goto out;
}
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
+ curr->priority = strtol (priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
max_pri = -1;
goto out;
} else {
- max_pri = max (max_pri, curr->priority);
+ max_pri = max (max_pri, curr->priority);
}
- free (dup_str);
+ GF_FREE (dup_str);
dup_str = NULL;
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- }
-out:
- if (string != NULL) {
- free (string);
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
}
+out:
+ GF_FREE (string);
- if (dup_str != NULL) {
- free (dup_str);
- }
+ GF_FREE (dup_str);
if (max_pri == -1) {
list_for_each_entry_safe (curr, tmp, first, list) {
list_del_init (&curr->list);
- free (curr->pattern);
- free (curr);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
}
}
- return max_pri;
+ return max_pri;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_ioc_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+static gf_boolean_t
+check_cache_size_ok (xlator_t *this, uint64_t cache_size)
+{
+ gf_boolean_t ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ opt = xlator_volume_option_get (this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get cache-size option");
+ goto out;
+ }
+
+ total_mem = get_mem_size ();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64,
+ max_cache_size);
+
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64
+ " is greater than the max size of %"PRIu64,
+ cache_size, max_cache_size);
+ goto out;
+ }
+out:
+ return ret;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ data_t *data = NULL;
+ ioc_table_t *table = NULL;
+ int ret = -1;
+ uint64_t cache_size_new = 0;
+ if (!this || !this->private)
+ goto out;
+
+ table = this->private;
+
+ ioc_table_lock (table);
+ {
+ GF_OPTION_RECONF ("cache-timeout", table->cache_timeout,
+ options, int32, unlock);
+
+ data = dict_get (options, "priority");
+ if (data) {
+ char *option_list = data_to_str (data);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
+ goto unlock;
+ }
+ table->max_pri ++;
+ }
+
+ GF_OPTION_RECONF ("max-file-size", table->max_file_size,
+ options, size, unlock);
+
+ GF_OPTION_RECONF ("min-file-size", table->min_file_size,
+ options, size, unlock);
+
+ if ((table->max_file_size >= 0) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_log (this->name, GF_LOG_ERROR, "minimum size (%"
+ PRIu64") of a file that can be cached is "
+ "greater than maximum size (%"PRIu64"). "
+ "Hence Defaulting to old value",
+ table->min_file_size, table->max_file_size);
+ goto unlock;
+ }
+
+ GF_OPTION_RECONF ("cache-size", cache_size_new,
+ options, size, unlock);
+ if (!check_cache_size_ok (this, cache_size_new)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not reconfiguring cache-size");
+ goto unlock;
+ }
+ table->cache_size = cache_size_new;
+
+ ret = 0;
+ }
+unlock:
+ ioc_table_unlock (table);
+out:
+ return ret;
+}
+
+
/*
- * init -
+ * init -
* @this:
*
*/
-int32_t
+int32_t
init (xlator_t *this)
{
- ioc_table_t *table = NULL;
- dict_t *options = this->options;
- uint32_t index = 0;
- char *cache_size_string = NULL;
- int32_t ret = -1;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: io-cache not configured with exactly "
- "one child");
+ ioc_table_t *table = NULL;
+ dict_t *xl_options = NULL;
+ uint32_t index = 0;
+ int32_t ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ data_t *data = 0;
+ uint32_t num_pages = 0;
+
+ xl_options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: io-cache not configured with exactly "
+ "one child");
goto out;
- }
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
- table = (void *) CALLOC (1, sizeof (*table));
+ table = (void *) GF_CALLOC (1, sizeof (*table), gf_ioc_mt_ioc_table_t);
if (table == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
goto out;
}
-
- table->xl = this;
- table->page_size = this->ctx->page_size;
- table->cache_size = IOC_CACHE_SIZE;
-
- if (dict_get (options, "cache-size"))
- cache_size_string = data_to_str (dict_get (options,
- "cache-size"));
- if (cache_size_string) {
- if (gf_string2bytesize (cache_size_string,
- &table->cache_size) != 0) {
- gf_log ("io-cache", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option cache-size\"",
- cache_size_string);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "using cache-size %"PRIu64"", table->cache_size);
- }
-
- table->cache_timeout = 1;
-
- if (dict_get (options, "cache-timeout")) {
- table->cache_timeout =
- data_to_uint32 (dict_get (options,
- "cache-timeout"));
- gf_log (this->name, GF_LOG_TRACE,
- "Using %d seconds to revalidate cache",
- table->cache_timeout);
- }
-
- INIT_LIST_HEAD (&table->priority_list);
- table->max_pri = 1;
- if (dict_get (options, "priority")) {
- char *option_list = data_to_str (dict_get (options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- table->max_pri = ioc_get_priority_list (option_list,
- &table->priority_list);
-
- if (table->max_pri == -1) {
+
+ table->xl = this;
+ table->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT ("cache-size", table->cache_size, size, out);
+
+ GF_OPTION_INIT ("cache-timeout", table->cache_timeout, int32, out);
+
+ GF_OPTION_INIT ("min-file-size", table->min_file_size, size, out);
+
+ GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out);
+
+ if (!check_cache_size_ok (this, table->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&table->priority_list);
+ table->max_pri = 1;
+ data = dict_get (xl_options, "priority");
+ if (data) {
+ char *option_list = data_to_str (data);
+ gf_log (this->name, GF_LOG_TRACE,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
goto out;
}
- }
- table->max_pri ++;
- INIT_LIST_HEAD (&table->inodes);
-
- table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head));
+ }
+ table->max_pri ++;
+
+ INIT_LIST_HEAD (&table->inodes);
+
+ if ((table->max_file_size >= 0)
+ && (table->min_file_size > table->max_file_size)) {
+ gf_log ("io-cache", GF_LOG_ERROR, "minimum size (%"
+ PRIu64") of a file that can be cached is "
+ "greater than maximum size (%"PRIu64")",
+ table->min_file_size, table->max_file_size);
+ goto out;
+ }
+
+ table->inode_lru = GF_CALLOC (table->max_pri,
+ sizeof (struct list_head),
+ gf_ioc_mt_list_head);
if (table->inode_lru == NULL) {
goto out;
}
- for (index = 0; index < (table->max_pri); index++)
- INIT_LIST_HEAD (&table->inode_lru[index]);
+ for (index = 0; index < (table->max_pri); index++)
+ INIT_LIST_HEAD (&table->inode_lru[index]);
+
+ this->local_pool = mem_pool_new (ioc_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ pthread_mutex_init (&table->table_lock, NULL);
+ this->private = table;
+
+ num_pages = (table->cache_size / table->page_size)
+ + ((table->cache_size % table->page_size)
+ ? 1 : 0);
+
+ table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages);
+ if (!table->mem_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to allocate mem_pool");
+ goto out;
+ }
- pthread_mutex_init (&table->table_lock, NULL);
- this->private = table;
ret = 0;
+ ctx = this->ctx;
+ ioc_log2_page_size = log_base2 (ctx->page_size);
+
out:
if (ret == -1) {
if (table != NULL) {
- free (table->inode_lru);
- free (table);
+ GF_FREE (table->inode_lru);
+ GF_FREE (table);
+ }
+ }
+
+ return ret;
+}
+
+void
+ioc_page_waitq_dump (ioc_page_t *page, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ trav = page->waitq;
+
+ while (trav) {
+ frame = trav->data;
+ sprintf (key, "waitq.frame[%d]", i++);
+ gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ ioc_page_t *page = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ page = trav->data;
+
+ sprintf (key, "cache-validation-waitq.page[%d].offset", i++);
+ gf_proc_dump_write (key, "%"PRId64, page->offset);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_page_dump (ioc_page_t *page, char *prefix)
+{
+
+ int ret = -1;
+
+ if (!page)
+ return;
+ /* ioc_page_lock can be used to hold the mutex. But in statedump
+ * its better to use trylock to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock (&page->page_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+ gf_proc_dump_write ("size", "%"PRId64, page->size);
+ gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+ ioc_page_waitq_dump (page, prefix);
+ }
+ pthread_mutex_unlock (&page->page_lock);
+
+out:
+ if (ret && page)
+ gf_proc_dump_write ("Unable to dump the page information",
+ "(Lock acquisition failed) %p", page);
+
+ return;
+}
+
+void
+__ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix)
+{
+ off_t offset = 0;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char timestr[256] = {0, };
+
+ if ((ioc_inode == NULL) || (prefix == NULL)) {
+ goto out;
+ }
+
+ table = ioc_inode->table;
+
+ if (ioc_inode->cache.tv.tv_sec) {
+ gf_time_fmt (timestr, sizeof timestr,
+ ioc_inode->cache.tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec);
+
+ gf_proc_dump_write ("last-cache-validation-time", "%s",
+ timestr);
+ }
+
+ for (offset = 0; offset < ioc_inode->ia_size;
+ offset += table->page_size) {
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page == NULL) {
+ continue;
+ }
+
+ sprintf (key, "inode.cache.page[%d]", i++);
+ __ioc_page_dump (page, key);
+ }
+out:
+ return;
+}
+
+
+int
+ioc_inode_dump (xlator_t *this, inode_t *inode)
+{
+
+ char *path = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ gf_boolean_t section_added = _gf_false;
+ char uuid_str[64] = {0,};
+
+ if (this == NULL || inode == NULL)
+ goto out;
+
+ gf_proc_dump_build_key (key_prefix, "io-cache", "inode");
+
+ inode_ctx_get (inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (ioc_inode == NULL)
+ goto out;
+
+ /* Similar to ioc_page_dump function its better to use
+ * pthread_mutex_trylock and not to use gf_log in statedump
+ * to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock (&ioc_inode->inode_lock);
+ if (ret)
+ goto out;
+
+ {
+ if (uuid_is_null (ioc_inode->inode->gfid))
+ goto unlock;
+
+ gf_proc_dump_add_section (key_prefix);
+ section_added = _gf_true;
+
+ __inode_path (ioc_inode->inode, NULL, &path);
+
+ gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight);
+
+ if (path) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("uuid", "%s", uuid_utoa_r
+ (ioc_inode->inode->gfid, uuid_str));
+ __ioc_cache_dump (ioc_inode, key_prefix);
+ __ioc_inode_waitq_dump (ioc_inode, key_prefix);
+ }
+unlock:
+ pthread_mutex_unlock (&ioc_inode->inode_lock);
+
+out:
+ if (ret && ioc_inode) {
+ if (section_added == _gf_false)
+ gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write ("Unable to print the status of ioc_inode",
+ "(Lock acquisition failed) %s",
+ uuid_utoa (inode->gfid));
+ }
+ return ret;
+}
+
+int
+ioc_priv_dump (xlator_t *this)
+{
+ ioc_table_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = -1;
+ gf_boolean_t add_section = _gf_false;
+
+ if (!this || !this->private)
+ goto out;
+
+ priv = this->private;
+
+ gf_proc_dump_build_key (key_prefix, "io-cache", "priv");
+ gf_proc_dump_add_section (key_prefix);
+ add_section = _gf_true;
+
+ ret = pthread_mutex_trylock (&priv->table_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("page_size", "%ld", priv->page_size);
+ gf_proc_dump_write ("cache_size", "%ld", priv->cache_size);
+ gf_proc_dump_write ("cache_used", "%ld", priv->cache_used);
+ gf_proc_dump_write ("inode_count", "%u", priv->inode_count);
+ gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout);
+ gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size);
+ gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size);
+ }
+ pthread_mutex_unlock (&priv->table_lock);
+out:
+ if (ret && priv) {
+ if (!add_section) {
+ gf_proc_dump_build_key (key_prefix, "xlator."
+ "performance.io-cache", "priv");
+ gf_proc_dump_add_section (key_prefix);
}
+ gf_proc_dump_write ("Unable to dump the state of private "
+ "structure of io-cache xlator", "(Lock "
+ "acquisition failed) %s", this->name);
}
- return ret;
+ return 0;
}
/*
* fini -
- *
+ *
* @this:
*
*/
void
fini (xlator_t *this)
{
- ioc_table_t *table = this->private;
+ ioc_table_t *table = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+ int i = 0;
+
+ table = this->private;
+
+ if (table == NULL)
+ return;
- pthread_mutex_destroy (&table->table_lock);
- FREE (table);
+ this->private = NULL;
- this->private = NULL;
- return;
+ if (table->mem_pool != NULL) {
+ mem_pool_destroy (table->mem_pool);
+ table->mem_pool = NULL;
+ }
+
+ list_for_each_entry_safe (curr, tmp, &table->priority_list, list) {
+ list_del_init (&curr->list);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
+ }
+
+ for (i = 0; i < table->max_pri; i++) {
+ GF_ASSERT (list_empty (&table->inode_lru[i]));
+ }
+
+ GF_ASSERT (list_empty (&table->inodes));
+ pthread_mutex_destroy (&table->table_lock);
+ GF_FREE (table);
+
+ this->private = NULL;
+ return;
}
struct xlator_fops fops = {
- .open = ioc_open,
- .create = ioc_create,
- .readv = ioc_readv,
- .writev = ioc_writev,
- .truncate = ioc_truncate,
- .ftruncate = ioc_ftruncate,
- .lookup = ioc_lookup,
- .lk = ioc_lk,
- .setattr = ioc_setattr
+ .open = ioc_open,
+ .create = ioc_create,
+ .readv = ioc_readv,
+ .writev = ioc_writev,
+ .truncate = ioc_truncate,
+ .ftruncate = ioc_ftruncate,
+ .lookup = ioc_lookup,
+ .lk = ioc_lk,
+ .setattr = ioc_setattr,
+ .mknod = ioc_mknod,
+
+ .readdirp = ioc_readdirp,
+ .discard = ioc_discard,
+ .zerofill = ioc_zerofill,
};
-struct xlator_mops mops = {
+
+struct xlator_dumpops dumpops = {
+ .priv = ioc_priv_dump,
+ .inodectx = ioc_inode_dump,
};
struct xlator_cbks cbks = {
- .forget = ioc_forget,
- .release = ioc_release
+ .forget = ioc_forget,
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"cache-timeout", "force-revalidate-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 60
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 4 * GF_UNIT_MB,
- .max = 6 * GF_UNIT_GB
- },
- { .key = {NULL} },
+ { .key = {"priority"},
+ .type = GF_OPTION_TYPE_PRIORITY_LIST,
+ .default_value = "",
+ .description = "Assigns priority to filenames with specific "
+ "patterns so that when a page needs to be ejected "
+ "out of the cache, the page of a file whose "
+ "priority is the lowest will be ejected earlier"
+ },
+ { .key = {"cache-timeout", "force-revalidate-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60,
+ .default_value = "1",
+ .description = "The cached data for a file will be retained till "
+ "'cache-refresh-timeout' seconds, after which data "
+ "re-validation is performed."
+ },
+ { .key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4 * GF_UNIT_MB,
+ .max = 32 * GF_UNIT_GB,
+ .default_value = "32MB",
+ .description = "Size of the read cache."
+ },
+ { .key = {"min-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Minimum file size which would be cached by the "
+ "io-cache translator."
+ },
+ { .key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Maximum file size which would be cached by the "
+ "io-cache translator."
+ },
+ { .key = {NULL} },
};