diff options
| author | Vikas Gorur <vikas@zresearch.com> | 2009-02-18 17:36:07 +0530 | 
|---|---|---|
| committer | Vikas Gorur <vikas@zresearch.com> | 2009-02-18 17:36:07 +0530 | 
| commit | 77adf4cd648dce41f89469dd185deec6b6b53a0b (patch) | |
| tree | 02e155a5753b398ee572b45793f889b538efab6b /xlators/performance/io-cache/src/io-cache.c | |
| parent | f3b2e6580e5663292ee113c741343c8a43ee133f (diff) | |
Added all files
Diffstat (limited to 'xlators/performance/io-cache/src/io-cache.c')
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 1478 | 
1 files changed, 1478 insertions, 0 deletions
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c new file mode 100644 index 00000000000..f367cdb88de --- /dev/null +++ b/xlators/performance/io-cache/src/io-cache.c @@ -0,0 +1,1478 @@ +/* +  Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "io-cache.h" +#include <assert.h> +#include <sys/time.h> + +static uint32_t +ioc_get_priority (ioc_table_t *table,  +		  const char *path); + +static uint32_t +ioc_get_priority (ioc_table_t *table,  +		  const char *path); + +static inline ioc_inode_t * +ioc_inode_reupdate (ioc_inode_t *ioc_inode) +{ +	ioc_table_t *table = ioc_inode->table; + +	list_add_tail (&ioc_inode->inode_lru,  +		       &table->inode_lru[ioc_inode->weight]); +   +	return ioc_inode; +} + +static inline ioc_inode_t * +ioc_get_inode (dict_t *dict, +	       char *name) +{ +	ioc_inode_t *ioc_inode = NULL; +	data_t *ioc_inode_data = dict_get (dict, name); +	ioc_table_t *table = NULL; + +	if (ioc_inode_data) { +		ioc_inode = data_to_ptr (ioc_inode_data); +		table = ioc_inode->table; + +		ioc_table_lock (table); +		{ +			if (list_empty (&ioc_inode->inode_lru)) { +				ioc_inode = ioc_inode_reupdate (ioc_inode); +			} +		} +		ioc_table_unlock (table); +	} +   +	return ioc_inode; +} + +int32_t +ioc_inode_need_revalidate (ioc_inode_t *ioc_inode) +{ +	int8_t need_revalidate = 0; +	struct timeval tv = {0,}; +	int32_t ret = -1; +	ioc_table_t *table = ioc_inode->table; + +	ret = gettimeofday (&tv, NULL); + +	if (time_elapsed (&tv, &ioc_inode->tv) >= table->cache_timeout) +		need_revalidate = 1; + +	return need_revalidate; +} + +/* + * __ioc_inode_flush - flush all the cached pages of the given inode + * + * @ioc_inode:  + * + * assumes lock is held + */ +int32_t +__ioc_inode_flush (ioc_inode_t *ioc_inode) +{ +	ioc_page_t *curr = NULL, *next = NULL; +	int32_t destroy_size = 0; +	int32_t ret = 0; + +	list_for_each_entry_safe (curr, next, &ioc_inode->pages, pages) { +		ret = ioc_page_destroy (curr); +     +		if (ret != -1)  +			destroy_size += ret; +	} +   +	return destroy_size; +} + +void +ioc_inode_flush (ioc_inode_t *ioc_inode) +{ +	int32_t destroy_size = 0;     + +	ioc_inode_lock (ioc_inode); +	{ +		destroy_size = __ioc_inode_flush (ioc_inode); +	} +	ioc_inode_unlock (ioc_inode); +   +	if (destroy_size) { +		ioc_table_lock (ioc_inode->table); +		{ +			ioc_inode->table->cache_used -= destroy_size; +		} +		ioc_table_unlock (ioc_inode->table); +	} + +	return; +} + +/*  + * ioc_utimens_cbk - + *  + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + */ +int32_t +ioc_utimens_cbk (call_frame_t *frame, +		 void *cookie, +		 xlator_t *this, +		 int32_t op_ret, +		 int32_t op_errno, +		 struct stat *stbuf) +{ +	STACK_UNWIND (frame, op_ret, op_errno, stbuf); +	return 0; +} + +/*  + * ioc_utimens - + *  + * @frame: + * @this: + * @loc: + * @tv: + * + */ +int32_t +ioc_utimens (call_frame_t *frame, +	     xlator_t *this, +	     loc_t *loc, +	     struct timespec *tv) +{ +	uint64_t ioc_inode = 0; +	inode_ctx_get (loc->inode, this, &ioc_inode); + +	if (ioc_inode) +		ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +	STACK_WIND (frame, ioc_utimens_cbk, +		    FIRST_CHILD (this), +		    FIRST_CHILD (this)->fops->utimens, +		    loc, tv); +	return 0; +} + +int32_t +ioc_lookup_cbk (call_frame_t *frame, +		void *cookie, +		xlator_t *this, +		int32_t op_ret, +		int32_t op_errno, +		inode_t *inode, +		struct stat *stbuf, +		dict_t *dict) +{ +	ioc_inode_t *ioc_inode = NULL; +	ioc_local_t *local = frame->local; +	ioc_table_t *table = this->private; +	ioc_page_t  *page = NULL; +	data_t      *page_data = NULL; +	data_t      *content_data = NULL; +	char        *src = NULL; +	char        *dst = NULL; +	char         need_unref = 0; +	uint8_t      cache_still_valid = 0; +	uint32_t     weight = 0; +	uint64_t     tmp_ioc_inode = 0; +	char        *buf = NULL; +	char        *tmp = NULL; +	int          i; +	 +	if (op_ret != 0)  +		goto out; + +	inode_ctx_get (inode, this, &tmp_ioc_inode); +	ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; +	if (ioc_inode) { +		cache_still_valid = ioc_cache_still_valid (ioc_inode,  +							   stbuf); +		 +		if (!cache_still_valid) { +			ioc_inode_flush (ioc_inode); +		}  +		/* update the time-stamp of revalidation */ +		ioc_inode_lock (ioc_inode); +		{ +			gettimeofday (&ioc_inode->tv, NULL); +		} +		ioc_inode_unlock (ioc_inode); +		 +		ioc_table_lock (ioc_inode->table); +		{ +			list_move_tail (&ioc_inode->inode_lru, +					&table->inode_lru[ioc_inode->weight]); +		} +		ioc_table_unlock (ioc_inode->table); +	} +	 +	if (local && stbuf->st_size &&  +	    local->need_xattr >= stbuf->st_size) { +		if (!ioc_inode) { +			weight = ioc_get_priority (table,  +						   local->file_loc.path); +			ioc_inode = ioc_inode_update (table,  +						      inode, weight); +			inode_ctx_put (inode, this,  +				       (uint64_t)(long)ioc_inode); +		} +		 +		ioc_inode_lock (ioc_inode); +		{ +			content_data = dict_get (dict, "glusterfs.content"); +			page = ioc_page_get (ioc_inode, 0); +			 +			if (content_data) { +				if (page) { +					dict_unref (page->ref); +					free (page->vector); +					page->vector = NULL; +					 +					ioc_table_lock (table); +					{ +						table->cache_used -=  +							page->size; +					} +					ioc_table_unlock (table); +				} else { +					page = ioc_page_create (ioc_inode, 0); +				} +				 +				dst = CALLOC (1, stbuf->st_size); +				page->ref = dict_ref (get_new_dict ()); +				page_data = data_from_dynptr (dst,  +							      stbuf->st_size); +				dict_set (page->ref, NULL, page_data); +				 +				src = data_to_ptr (content_data); +				memcpy (dst, src, stbuf->st_size); + +				page->vector = CALLOC (1,  +						       sizeof (*page->vector)); +				page->vector->iov_base = dst; +				page->vector->iov_len = stbuf->st_size; +				page->count = 1; +       +				page->waitq = NULL; +				page->size = stbuf->st_size; +				page->ready = 1; + +				ioc_table_lock (table); +				{ +					table->cache_used += page->size; +				} +				ioc_table_unlock (table); +				 +			} else { +				if (!(page && page->ready)) { +					gf_log (this->name, GF_LOG_DEBUG, +						"page not present"); +					 +					ioc_inode_unlock (ioc_inode); +					STACK_WIND (frame, +						    ioc_lookup_cbk, +						    FIRST_CHILD (this), +						    FIRST_CHILD (this)->fops->lookup, +						    &local->file_loc, +						    local->xattr_req); +					return 0; +				}  +				buf = CALLOC (1, stbuf->st_size); +				tmp = buf; + +				for (i = 0; i < page->count; i++) { +					memcpy (tmp, page->vector[i].iov_base,  +						page->vector[i].iov_len); +					tmp += page->vector[i].iov_len; +				} +				 +				gf_log (this->name, GF_LOG_DEBUG, +					"serving file %s from cache",  +					local->file_loc.path); +				 +				if (!dict) { +					need_unref = 1; +					dict = dict_ref ( +						get_new_dict ()); +				} +				dict_set (dict, "glusterfs.content", +					  data_from_dynptr (buf,  +							    stbuf->st_size)); +			} + +			ioc_inode->mtime = stbuf->st_mtime; +			gettimeofday (&ioc_inode->tv, NULL); +		} +		ioc_inode_unlock (ioc_inode); +		 +		if (content_data &&  +		    ioc_need_prune (ioc_inode->table)) { +			ioc_prune (ioc_inode->table); +		} +	} + + out: +	STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, dict); + +	if (need_unref) { +		dict_unref (dict); +	} + +	return 0; +} + +int32_t  +ioc_lookup (call_frame_t *frame, +	    xlator_t *this, +	    loc_t *loc, +	    dict_t *xattr_req) +{ +	uint64_t content_limit = 0; + +	if (GF_FILE_CONTENT_REQUESTED(xattr_req, &content_limit)) { +		uint64_t     tmp_ioc_inode = 0; +		ioc_inode_t *ioc_inode = NULL; +		ioc_page_t  *page = NULL; +		ioc_local_t *local = CALLOC (1, sizeof (*local)); + +		local->need_xattr = content_limit; +		local->file_loc.path = loc->path; +		local->file_loc.inode = loc->inode; +		frame->local = local; + +		inode_ctx_get (loc->inode, this, &tmp_ioc_inode); +		ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + +		if (ioc_inode) { +			ioc_inode_lock (ioc_inode); +			{ +				page = ioc_page_get (ioc_inode, 0); +				if ((content_limit <=  +				     ioc_inode->table->page_size) &&  +				    page && page->ready) { +					local->need_xattr = -1; +				} +			} +			ioc_inode_unlock (ioc_inode); +		} +	} + +	STACK_WIND (frame, +		    ioc_lookup_cbk, +		    FIRST_CHILD (this), +		    FIRST_CHILD (this)->fops->lookup, +		    loc, +		    xattr_req); +	return 0; +} + +/* + * ioc_forget -  + * + * @frame: + * @this: + * @inode: + * + */ +int32_t +ioc_forget (xlator_t *this, +	    inode_t *inode) +{ +	uint64_t ioc_inode = 0; + +	inode_ctx_get (inode, this, &ioc_inode); + +	if (ioc_inode) +		ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode); +	     +	return 0; +} + + +/*  + * ioc_cache_validate_cbk -  + * + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf + * + */ +int32_t +ioc_cache_validate_cbk (call_frame_t *frame, +			void *cookie, +			xlator_t *this, +			int32_t op_ret, +			int32_t op_errno, +			struct stat *stbuf) +{ +	ioc_local_t *local = frame->local; +	ioc_inode_t *ioc_inode = NULL; +	size_t destroy_size = 0; +	struct stat *local_stbuf = stbuf; + +	ioc_inode = local->inode; + +	if ((op_ret == -1) ||  +	    ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) { +		gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, +			"cache for inode(%p) is invalid. flushing all pages", +			ioc_inode); +		/* NOTE: only pages with no waiting frames are flushed by  +		 * ioc_inode_flush. page_fault will be generated for all  +		 * the pages which have waiting frames by ioc_inode_wakeup() +		 */ +		ioc_inode_lock (ioc_inode); +		{ +			destroy_size = __ioc_inode_flush (ioc_inode); +			if (op_ret >= 0) +				ioc_inode->mtime = stbuf->st_mtime; +		} +		ioc_inode_unlock (ioc_inode); +		local_stbuf = NULL; +	} + +	if (destroy_size) { +		ioc_table_lock (ioc_inode->table); +		{ +			ioc_inode->table->cache_used -= destroy_size; +		} +		ioc_table_unlock (ioc_inode->table); +	} + +	if (op_ret < 0) +		local_stbuf = NULL; +   +	ioc_inode_lock (ioc_inode); +	{ +		gettimeofday (&ioc_inode->tv, NULL); +	} +	ioc_inode_unlock (ioc_inode); + +	ioc_inode_wakeup (frame, ioc_inode, local_stbuf); +   +	/* any page-fault initiated by ioc_inode_wakeup() will have its own  +	 * fd_ref on fd, safe to unref validate frame's private copy  +	 */ +	fd_unref (local->fd); + +	STACK_DESTROY (frame->root); + +	return 0; +} + +static int32_t +ioc_wait_on_inode (ioc_inode_t *ioc_inode,  +		   ioc_page_t *page) +{ +	ioc_waitq_t *waiter = NULL, *trav = NULL; +	uint32_t page_found = 0; + +	trav = ioc_inode->waitq; + +	while (trav) { +		if (trav->data == page) { +			page_found = 1; +			break; +		} +		trav = trav->next; +	} +   +	if (!page_found) { +		waiter = CALLOC (1, sizeof (ioc_waitq_t)); +		ERR_ABORT (waiter); +		waiter->data = page; +		waiter->next = ioc_inode->waitq; +		ioc_inode->waitq = waiter; +	} +   +	return 0; +} + +/* + * ioc_cache_validate - + * + * @frame: + * @ioc_inode: + * @fd: + * + */ +static int32_t +ioc_cache_validate (call_frame_t *frame, +		    ioc_inode_t *ioc_inode, +		    fd_t *fd, +		    ioc_page_t *page) +{ +	call_frame_t *validate_frame = NULL; +	ioc_local_t *validate_local = NULL; + +	validate_local = CALLOC (1, sizeof (ioc_local_t)); +	ERR_ABORT (validate_local); +	validate_frame = copy_frame (frame); +	validate_local->fd = fd_ref (fd); +	validate_local->inode = ioc_inode; +	validate_frame->local = validate_local; +     +	STACK_WIND (validate_frame, +		    ioc_cache_validate_cbk, +		    FIRST_CHILD (frame->this), +		    FIRST_CHILD (frame->this)->fops->fstat, +		    fd); + +	return 0; +} + +static inline uint32_t +is_match (const char *path, +	  const char *pattern) +{ +	char *pathname = strdup (path); +	int32_t ret = 0; + +	ret = fnmatch (pattern, path, FNM_NOESCAPE); +   +	free (pathname); +   +	return (ret == 0); +} + +static uint32_t +ioc_get_priority (ioc_table_t *table,  +		  const char *path) +{ +	uint32_t priority = 0; +	struct ioc_priority *curr = NULL; +   +	list_for_each_entry (curr, &table->priority_list, list) { +		if (is_match (path, curr->pattern))  +			priority = curr->priority; +	} + +	return priority; +} + +/*  + * ioc_open_cbk - open callback for io cache + * + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @fd: + * + */ +int32_t +ioc_open_cbk (call_frame_t *frame, +	      void *cookie, +	      xlator_t *this, +	      int32_t op_ret, +	      int32_t op_errno, +	      fd_t *fd) +{ +	uint64_t     tmp_ioc_inode = 0; +	ioc_local_t *local = frame->local; +	ioc_table_t *table = this->private; +	ioc_inode_t *ioc_inode = NULL; +	inode_t *inode = local->file_loc.inode; +	uint32_t weight = 0; +	const char *path = local->file_loc.path; + +	if (op_ret != -1) { +		/* look for ioc_inode corresponding to this fd */ +		LOCK (&fd->inode->lock); +		//{ + +		inode_ctx_get (fd->inode, this, &tmp_ioc_inode); +		ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; +       +		if (!ioc_inode) { +			/* this is the first time someone is opening this  +			   file, assign weight  +			*/ +			weight = ioc_get_priority (table, path); +  +			ioc_inode = ioc_inode_update (table, inode, weight); +			inode_ctx_put (fd->inode, this,  +				       (uint64_t)(long)ioc_inode); +		} else { +			ioc_table_lock (ioc_inode->table); +			//{ +			list_move_tail (&ioc_inode->inode_lru, +					&table->inode_lru[ioc_inode->weight]); +			//} +			ioc_table_unlock (ioc_inode->table); +		} + +		//} +		UNLOCK (&fd->inode->lock); + +		/* If mandatory locking has been enabled on this file, +		   we disable caching on it */ +		if (((inode->st_mode & S_ISGID) &&  +		     !(inode->st_mode & S_IXGRP))) { +			fd_ctx_set (fd, this, 1); +		} +   +		/* If O_DIRECT open, we disable caching on it */ +		if ((local->flags & O_DIRECT)){ +			/* O_DIRECT is only for one fd, not the inode  +			 * as a whole  +			 */ +			fd_ctx_set (fd, this, 1); +		} +	} + +	FREE (local); +	frame->local = NULL; + +	STACK_UNWIND (frame, op_ret, op_errno, fd); + +	return 0; +} + +/* + * ioc_create_cbk - create callback for io cache + * + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @fd: + * @inode: + * @buf: + * + */ +int32_t +ioc_create_cbk (call_frame_t *frame, +		void *cookie, +		xlator_t *this, +		int32_t op_ret, +		int32_t op_errno, +		fd_t *fd, +		inode_t *inode, +		struct stat *buf) +{ +	ioc_local_t *local = frame->local; +	ioc_table_t *table = this->private; +	ioc_inode_t *ioc_inode = NULL; +	uint32_t weight = 0; +	const char *path = local->file_loc.path; + +	if (op_ret != -1) { +		{ +			/* assign weight */ +			weight = ioc_get_priority (table, path); + +			ioc_inode = ioc_inode_update (table, inode, weight); +			LOCK (&fd->inode->lock); +			{ +				inode_ctx_put (fd->inode, this,  +					       (uint64_t)(long)ioc_inode); +			} +			UNLOCK (&fd->inode->lock); +		} +		/* If mandatory locking has been enabled on this file, +		   we disable caching on it */ +		if ((inode->st_mode & S_ISGID) &&  +		    !(inode->st_mode & S_IXGRP)) { +			fd_ctx_set (fd, this, 1); +		} + +		/* If O_DIRECT open, we disable caching on it */ +		if (local->flags & O_DIRECT){ +			/* O_DIRECT is only for one fd, not the inode  +			 * as a whole  +			 */ +			fd_ctx_set (fd, this, 1); +		} +     +	} +   +	frame->local = NULL; +	FREE (local); + +	STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + +	return 0; +} + +/* + * ioc_open - open fop for io cache + * @frame: + * @this: + * @loc: + * @flags: + * + */ +int32_t +ioc_open (call_frame_t *frame, +	  xlator_t *this, +	  loc_t *loc, +	  int32_t flags, +	  fd_t *fd) +{ +   +	ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t)); +	ERR_ABORT (local); + +	local->flags = flags; +	local->file_loc.path = loc->path; +	local->file_loc.inode = loc->inode; +   +	frame->local = local; +   +	STACK_WIND (frame, +		    ioc_open_cbk, +		    FIRST_CHILD(this), +		    FIRST_CHILD(this)->fops->open, +		    loc, +		    flags, +		    fd); + +	return 0; +} + +/* + * ioc_create - create fop for io cache + *  + * @frame: + * @this: + * @pathname: + * @flags: + * @mode: + * + */ +int32_t +ioc_create (call_frame_t *frame, +	    xlator_t *this, +	    loc_t *loc, +	    int32_t flags, +	    mode_t mode, +	    fd_t *fd) +{ +	ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t)); +	ERR_ABORT (local); + +	local->flags = flags; +	local->file_loc.path = loc->path; +	frame->local = local; + +	STACK_WIND (frame, ioc_create_cbk, +		    FIRST_CHILD(this), +		    FIRST_CHILD(this)->fops->create, +		    loc, flags, mode, fd); +	return 0; +} + + + + +/* + * ioc_release - release fop for io cache + *  + * @frame: + * @this: + * @fd: + * + */ +int32_t +ioc_release (xlator_t *this, +	     fd_t *fd) +{ +	return 0; +} + +/*  + * ioc_readv_disabled_cbk  + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @vector: + * @count: + * + */  +int32_t +ioc_readv_disabled_cbk (call_frame_t *frame,  +			void *cookie, +			xlator_t *this, +			int32_t op_ret, +			int32_t op_errno, +			struct iovec *vector, +			int32_t count, +			struct stat *stbuf) +{ +	STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); +	return 0; +} + + +int32_t +ioc_need_prune (ioc_table_t *table) +{ +	int64_t cache_difference = 0; +   +	ioc_table_lock (table); +	{ +		cache_difference = table->cache_used - table->cache_size; +	} +	ioc_table_unlock (table); + +	if (cache_difference > 0) +		return 1; +	else  +		return 0; +} + +/* + * dispatch_requests - + *  + * @frame: + * @inode: + * + *  + */ +static void +dispatch_requests (call_frame_t *frame, +		   ioc_inode_t *ioc_inode, +		   fd_t *fd, +		   off_t offset, +		   size_t size) +{ +	ioc_local_t *local = frame->local; +	ioc_table_t *table = ioc_inode->table; +	ioc_page_t  *trav = NULL; +	ioc_waitq_t *waitq = NULL; +	off_t   rounded_offset = 0; +	off_t   rounded_end = 0; +	off_t   trav_offset = 0; +	int32_t fault = 0; +	int8_t  need_validate = 0; +	int8_t  might_need_validate = 0;  /* if a page exists, do we need  +					    to validate it? */ + +	rounded_offset = floor (offset, table->page_size); +	rounded_end = roof (offset + size, table->page_size); +	trav_offset = rounded_offset; + +	/* once a frame does read, it should be waiting on something */ +	local->wait_count++; + +	/* Requested region can fall in three different pages, +	 * 1. Ready - region is already in cache, we just have to serve it. +	 * 2. In-transit - page fault has been generated on this page, we need +	 *    to wait till the page is ready +	 * 3. Fault - page is not in cache, we have to generate a page fault +	 */ + +	might_need_validate = ioc_inode_need_revalidate (ioc_inode); + +	while (trav_offset < rounded_end) { +		size_t trav_size = 0; +		off_t local_offset = 0; + +		ioc_inode_lock (ioc_inode); +		//{ + +		/* look for requested region in the cache */ +		trav = ioc_page_get (ioc_inode, trav_offset); + +		local_offset = max (trav_offset, offset); +		trav_size = min (((offset+size) - local_offset),  +				 table->page_size); + +		if (!trav) { +			/* page not in cache, we need to generate page fault */ +			trav = ioc_page_create (ioc_inode, trav_offset); +			fault = 1; +			if (!trav) { +				gf_log (frame->this->name, GF_LOG_CRITICAL, +					"ioc_page_create returned NULL"); +			} +		}  + +		ioc_wait_on_page (trav, frame, local_offset, trav_size); + +		if (trav->ready) { +			/* page found in cache */ +			if (!might_need_validate) { +				/* fresh enough */ +				gf_log (frame->this->name, GF_LOG_DEBUG, +					"cache hit for trav_offset=%"PRId64"" +					"/local_offset=%"PRId64"", +					trav_offset, local_offset); +				waitq = ioc_page_wakeup (trav); +			} else { +				/* if waitq already exists, fstat revalidate is +				   already on the way */ +				if (!ioc_inode->waitq) { +					need_validate = 1; +				} +				ioc_wait_on_inode (ioc_inode, trav); +			} +		} + +		//} +		ioc_inode_unlock (ioc_inode); +     +		ioc_waitq_return (waitq); +		waitq = NULL; + +		if (fault) { +			fault = 0; +			/* new page created, increase the table->cache_used */ +			ioc_page_fault (ioc_inode, frame, fd, trav_offset); +		} + +		if (need_validate) { +			need_validate = 0; +			gf_log (frame->this->name, GF_LOG_DEBUG, +				"sending validate request for " +				"inode(%"PRId64") at offset=%"PRId64"", +				fd->inode->ino, trav_offset); +			ioc_cache_validate (frame, ioc_inode, fd, trav); +		} +     +		trav_offset += table->page_size; +	} + +	ioc_frame_return (frame); + +	if (ioc_need_prune (ioc_inode->table)) { +		ioc_prune (ioc_inode->table); +	} + +	return; +} + + +/* + * ioc_readv - + *  + * @frame: + * @this: + * @fd: + * @size: + * @offset: + * + */ +int32_t +ioc_readv (call_frame_t *frame, +	   xlator_t *this, +	   fd_t *fd, +	   size_t size, +	   off_t offset) +{ +	uint64_t     tmp_ioc_inode = 0; +	ioc_inode_t *ioc_inode = NULL; +	ioc_local_t *local = NULL; +	uint32_t     weight = 0; + +	inode_ctx_get (fd->inode, this, &tmp_ioc_inode); +	ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; +	if (!ioc_inode) { +		/* caching disabled, go ahead with normal readv */ +		STACK_WIND (frame,  +			    ioc_readv_disabled_cbk, +			    FIRST_CHILD (frame->this),  +			    FIRST_CHILD (frame->this)->fops->readv, +			    fd,  +			    size,  +			    offset); +		return 0; +	} + +	if (!fd_ctx_get (fd, this, NULL)) { +		/* disable caching for this fd, go ahead with normal readv */ +		STACK_WIND (frame,  +			    ioc_readv_disabled_cbk, +			    FIRST_CHILD (frame->this),  +			    FIRST_CHILD (frame->this)->fops->readv, +			    fd,  +			    size,  +			    offset); +		return 0; +	} + +	local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t)); +	ERR_ABORT (local); +	INIT_LIST_HEAD (&local->fill_list); + +	frame->local = local;   +	local->pending_offset = offset; +	local->pending_size = size; +	local->offset = offset; +	local->size = size; +	local->inode = ioc_inode; + +	gf_log (this->name, GF_LOG_DEBUG, +		"NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",  +		frame, offset, size); + +	weight = ioc_inode->weight; + +	ioc_table_lock (ioc_inode->table); +	{ +		list_move_tail (&ioc_inode->inode_lru,  +				&ioc_inode->table->inode_lru[weight]); +	} +	ioc_table_unlock (ioc_inode->table); + +	dispatch_requests (frame, ioc_inode, fd, offset, size); +   +	return 0; +} + +/* + * ioc_writev_cbk - + *  + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + */ +int32_t +ioc_writev_cbk (call_frame_t *frame, +		void *cookie, +		xlator_t *this, +		int32_t op_ret, +		int32_t op_errno, +		struct stat *stbuf) +{ +	ioc_local_t *local     = frame->local; +	uint64_t     ioc_inode = 0; + +	inode_ctx_get (local->fd->inode, this, &ioc_inode); +   +	if (ioc_inode) +		ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +	STACK_UNWIND (frame, op_ret, op_errno, stbuf); +	return 0; +} + +/* + * ioc_writev + *  + * @frame: + * @this: + * @fd: + * @vector: + * @count: + * @offset: + * + */ +int32_t +ioc_writev (call_frame_t *frame, +	    xlator_t *this, +	    fd_t *fd, +	    struct iovec *vector, +	    int32_t count, +	    off_t offset) +{ +	ioc_local_t *local     = NULL; +	uint64_t     ioc_inode = 0; +	 +	local = CALLOC (1, sizeof (ioc_local_t)); +	ERR_ABORT (local); + +	/* TODO: why is it not fd_ref'ed */ +	local->fd = fd; +	frame->local = local; + +	inode_ctx_get (fd->inode, this, &ioc_inode); +	if (ioc_inode) +		ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +	STACK_WIND (frame, +		    ioc_writev_cbk, +		    FIRST_CHILD(this), +		    FIRST_CHILD(this)->fops->writev, +		    fd, +		    vector, +		    count, +		    offset); + +	return 0; +} + +/* + * ioc_truncate_cbk - + *  + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf: + * + */ +int32_t  +ioc_truncate_cbk (call_frame_t *frame, +		  void *cookie, +		  xlator_t *this, +		  int32_t op_ret, +		  int32_t op_errno, +		  struct stat *buf) +{ + +	STACK_UNWIND (frame, op_ret, op_errno, buf); +	return 0; +} + +/* + * ioc_truncate - + *  + * @frame: + * @this: + * @loc: + * @offset: + * + */ +int32_t  +ioc_truncate (call_frame_t *frame, +	      xlator_t *this, +	      loc_t *loc, +	      off_t offset) +{ +	uint64_t ioc_inode = 0; +	inode_ctx_get (loc->inode, this, &ioc_inode); + +	if (ioc_inode) +		ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +	STACK_WIND (frame, +		    ioc_truncate_cbk, +		    FIRST_CHILD(this), +		    FIRST_CHILD(this)->fops->truncate, +		    loc, +		    offset); +	return 0; +} + +/* + * ioc_ftruncate - + *  + * @frame: + * @this: + * @fd: + * @offset: + * + */ +int32_t +ioc_ftruncate (call_frame_t *frame, +	       xlator_t *this, +	       fd_t *fd, +	       off_t offset) +{ +	uint64_t ioc_inode = 0; +	inode_ctx_get (fd->inode, this, &ioc_inode); + +	if (ioc_inode) +		ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +	STACK_WIND (frame, +		    ioc_truncate_cbk, +		    FIRST_CHILD(this), +		    FIRST_CHILD(this)->fops->ftruncate, +		    fd, +		    offset); +	return 0; +} + +int32_t +ioc_lk_cbk (call_frame_t *frame, +	    void *cookie, +	    xlator_t *this, +	    int32_t op_ret, +	    int32_t op_errno, +	    struct flock *lock) +{ +	STACK_UNWIND (frame, op_ret, op_errno, lock); +	return 0; +} + +int32_t  +ioc_lk (call_frame_t *frame, +	xlator_t *this, +	fd_t *fd, +	int32_t cmd, +	struct flock *lock) +{ +	ioc_inode_t *ioc_inode = NULL; +	uint64_t     tmp_inode = 0; + +	inode_ctx_get (fd->inode, this, &tmp_inode); +	ioc_inode = (ioc_inode_t *)(long)tmp_inode; +	if (!ioc_inode) { +		gf_log (this->name, GF_LOG_ERROR, +			"inode context is NULL: returning EBADFD"); +		STACK_UNWIND (frame, -1, EBADFD, NULL); +		return 0; +	} + +	ioc_inode_lock (ioc_inode); +	{ +		gettimeofday (&ioc_inode->tv, NULL); +	} +	ioc_inode_unlock (ioc_inode); + +	STACK_WIND (frame, ioc_lk_cbk,  +		    FIRST_CHILD (this), +		    FIRST_CHILD (this)->fops->lk, fd, cmd, lock); +	return 0; +} + +int32_t +ioc_get_priority_list (const char *opt_str, struct list_head *first) +{ +	int32_t max_pri = 0; +	char *tmp_str = NULL; +	char *tmp_str1 = NULL; +	char *tmp_str2 = NULL; +	char *dup_str = NULL; +	char *stripe_str = NULL; +	char *pattern = NULL; +	char *priority = NULL; +	char *string = strdup (opt_str); +	struct ioc_priority *curr = NULL; + +	/* Get the pattern for cache priority.  +	 * "option priority *.jpg:1,abc*:2" etc  +	 */ +	/* TODO: inode_lru in table is statically hard-coded to 5,  +	 * should be changed to run-time configuration  +	 */ +	stripe_str = strtok_r (string, ",", &tmp_str); +	while (stripe_str) { +		curr = CALLOC (1, sizeof (struct ioc_priority)); +		ERR_ABORT (curr); +		list_add_tail (&curr->list, first); + +		dup_str = strdup (stripe_str); +		pattern = strtok_r (dup_str, ":", &tmp_str1); +		if (!pattern) +			return -1; +		priority = strtok_r (NULL, ":", &tmp_str1); +		if (!priority) +			return -1; +		gf_log ("io-cache",  +			GF_LOG_DEBUG,  +			"ioc priority : pattern %s : priority %s",  +			pattern, +			priority); +		curr->pattern = strdup (pattern); +		curr->priority = strtol (priority, &tmp_str2, 0); +		if (tmp_str2 && (*tmp_str2)) +			return -1; +		else +			max_pri = max (max_pri, curr->priority); +		stripe_str = strtok_r (NULL, ",", &tmp_str); +	} + +	return max_pri; +} + +/* + * init -  + * @this: + * + */ +int32_t  +init (xlator_t *this) +{ +	ioc_table_t *table; +	dict_t *options = this->options; +	uint32_t index = 0; +	char *page_size_string = NULL; +	char *cache_size_string = NULL; + +	if (!this->children || this->children->next) { +		gf_log (this->name, GF_LOG_ERROR, +			"FATAL: io-cache not configured with exactly " +			"one child"); +		return -1; +	} + +	if (!this->parents) { +		gf_log (this->name, GF_LOG_WARNING, +			"dangling volume. check volfile "); +	} + +	table = (void *) CALLOC (1, sizeof (*table)); +	ERR_ABORT (table); +   +	table->xl = this; +	table->page_size = IOC_PAGE_SIZE; +	table->cache_size = IOC_CACHE_SIZE; + +	if (dict_get (options, "page-size")) +		page_size_string = data_to_str (dict_get (options,  +							  "page-size")); + +	if (page_size_string) { +		if (gf_string2bytesize (page_size_string,  +					&table->page_size) != 0) { +			gf_log ("io-cache", GF_LOG_ERROR,  +				"invalid number format \"%s\" of " +				"\"option page-size\"",  +				page_size_string); +			return -1; +		} +		gf_log (this->name, GF_LOG_DEBUG,  +			"using page-size %"PRIu64"",  table->page_size); +	} +   +	if (dict_get (options, "cache-size")) +		cache_size_string = data_to_str (dict_get (options,  +							   "cache-size")); +	if (cache_size_string) { +		if (gf_string2bytesize (cache_size_string,  +					&table->cache_size) != 0) { +			gf_log ("io-cache", GF_LOG_ERROR,  +				"invalid number format \"%s\" of " +				"\"option cache-size\"",  +				cache_size_string); +			return -1; +		} +       +		gf_log (this->name, GF_LOG_DEBUG,  +			"using cache-size %"PRIu64"", table->cache_size); +	} +   +	table->cache_timeout = 1; + +	if (dict_get (options, "cache-timeout")) { +		table->cache_timeout =  +			data_to_uint32 (dict_get (options, +						  "cache-timeout")); +		gf_log (this->name, GF_LOG_DEBUG, +			"Using %d seconds to revalidate cache", +			table->cache_timeout); +	} + +	INIT_LIST_HEAD (&table->priority_list); +	if (dict_get (options, "priority")) { +		char *option_list = data_to_str (dict_get (options,  +							   "priority")); +		gf_log (this->name, GF_LOG_DEBUG, +			"option path %s", option_list); +		/* parse the list of pattern:priority */ +		table->max_pri = ioc_get_priority_list (option_list,  +							&table->priority_list); +     +		if (table->max_pri == -1) +			return -1; +	} +	table->max_pri ++; +	INIT_LIST_HEAD (&table->inodes); +   +	table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head)); +	ERR_ABORT (table->inode_lru); +	for (index = 0; index < (table->max_pri); index++) +		INIT_LIST_HEAD (&table->inode_lru[index]); + +	pthread_mutex_init (&table->table_lock, NULL); +	this->private = table; +	return 0; +} + +/* + * fini - + *  + * @this: + * + */ +void +fini (xlator_t *this) +{ +	ioc_table_t *table = this->private; + +	pthread_mutex_destroy (&table->table_lock); +	FREE (table); + +	this->private = NULL; +	return; +} + +struct xlator_fops fops = { +	.open        = ioc_open, +	.create      = ioc_create, +	.readv       = ioc_readv, +	.writev      = ioc_writev, +	.truncate    = ioc_truncate, +	.ftruncate   = ioc_ftruncate, +	.utimens     = ioc_utimens, +	.lookup      = ioc_lookup, +	.lk          = ioc_lk +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { +	.forget      = ioc_forget, +  	.release     = ioc_release +}; + +struct volume_options options[] = { +	{ .key  = {"priority"},  +	  .type = GF_OPTION_TYPE_ANY  +	}, +	{ .key  = {"cache-timeout", "force-revalidate-timeout"}, +	  .type = GF_OPTION_TYPE_INT, +	  .min  = 0,  +	  .max  = 60  +	},  +	{ .key  = {"page-size"},  +	  .type = GF_OPTION_TYPE_SIZET,  +	  .min  = 16 * GF_UNIT_KB,  +	  .max  =  4 * GF_UNIT_MB  +	}, +	{ .key  = {"cache-size"},  +	  .type = GF_OPTION_TYPE_SIZET, +	  .min  = 4 * GF_UNIT_MB,  +	  .max  = 6 * GF_UNIT_GB  +	}, +	{ .key = {NULL} }, +};  | 
