diff options
| author | Vikas Gorur <vikas@zresearch.com> | 2009-02-18 17:36:07 +0530 | 
|---|---|---|
| committer | Vikas Gorur <vikas@zresearch.com> | 2009-02-18 17:36:07 +0530 | 
| commit | 77adf4cd648dce41f89469dd185deec6b6b53a0b (patch) | |
| tree | 02e155a5753b398ee572b45793f889b538efab6b /xlators/cluster/afr/src/afr-inode-read.c | |
| parent | f3b2e6580e5663292ee113c741343c8a43ee133f (diff) | |
Added all files
Diffstat (limited to 'xlators/cluster/afr/src/afr-inode-read.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 721 | 
1 files changed, 721 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c new file mode 100644 index 00000000000..a6c99ec0576 --- /dev/null +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -0,0 +1,721 @@ +/* +   Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" + +#include "afr.h" + + +/** + * Common algorithm for inode read calls: + *  + * - Try the fop on the first child that is up + * - if we have failed due to ENOTCONN: + *     try the next child + * + * Applicable to: access, stat, fstat, readlink, getxattr + */ + +/* {{{ access */ + +int32_t +afr_access_cbk (call_frame_t *frame, void *cookie, +		xlator_t *this, int32_t op_ret, int32_t op_errno) +{ +	afr_private_t * priv     = NULL; +	afr_local_t *   local    = NULL; +	xlator_t **     children = NULL; + +	int unwind     = 1; +	int last_tried = -1; +	int this_try = -1; + +	priv     = this->private; +	children = priv->children; + +	local = frame->local; + +	if (op_ret == -1) { +		last_tried = local->cont.access.last_tried; + +		if (all_tried (last_tried, priv->child_count)) { +			goto out; +		} +		this_try    = ++local->cont.access.last_tried; + +		unwind = 0; + +		STACK_WIND_COOKIE (frame, afr_access_cbk, +				   (void *) (long) this_try, +				   children[this_try],  +				   children[this_try]->fops->access, +				   &local->loc, local->cont.access.mask); +	} + +out: +	if (unwind) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno); +	} + +	return 0; +} + + +int32_t +afr_access (call_frame_t *frame, xlator_t *this, +	    loc_t *loc, int32_t mask) +{ +	afr_private_t * priv       = NULL; +	xlator_t **     children   = NULL; +	int             call_child = 0; +	afr_local_t     *local     = NULL; + +	int32_t op_ret   = -1; +	int32_t op_errno = 0; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (this->private, out); + +	priv     = this->private; +	VALIDATE_OR_GOTO (priv->children, out); + +	children = priv->children; + +	ALLOC_OR_GOTO (local, afr_local_t, out); + +	call_child = afr_first_up_child (priv); +	if (call_child == -1) { +		op_errno = ENOTCONN; +		gf_log (this->name, GF_LOG_ERROR, +			"no child is up :("); +		goto out; +	} + +	local->cont.access.last_tried = call_child; +	loc_copy (&local->loc, loc); +	local->cont.access.mask       = mask; + +	STACK_WIND_COOKIE (frame, afr_access_cbk, +			   (void *) (long) call_child, +			   children[call_child], children[call_child]->fops->access, +			   loc, mask); + +	op_ret = 0; +out: +	if (op_ret == -1) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno); +	} +	return 0; +} + + +/* }}} */ + +/* {{{ stat */ + +int32_t +afr_stat_cbk (call_frame_t *frame, void *cookie, +	      xlator_t *this, int32_t op_ret, int32_t op_errno, +	      struct stat *buf) +{ +	afr_private_t * priv     = NULL; +	afr_local_t *   local    = NULL; +	xlator_t **     children = NULL; + +	int deitransform_child = -1; + +	int unwind     = 1; +	int last_tried = -1; +	int this_try = -1; + +	priv     = this->private; +	children = priv->children; + +	deitransform_child = (long) cookie; + +	local = frame->local; + +	if (op_ret == -1) { +	retry: +		last_tried = local->cont.stat.last_tried; + +		if (all_tried (last_tried, priv->child_count)) { +			goto out; +		} +		this_try = ++local->cont.stat.last_tried; + +		if (this_try == deitransform_child) { +			goto retry; +		} + +		unwind = 0; + +		STACK_WIND_COOKIE (frame, afr_stat_cbk, +				   (void *) (long) deitransform_child, +				   children[this_try],  +				   children[this_try]->fops->stat, +				   &local->loc); +	} + +out: +	if (unwind) { +		if (op_ret != -1) +			buf->st_ino = local->cont.stat.ino; + +		AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); +	} + +	return 0; +} + + +int32_t +afr_stat (call_frame_t *frame, xlator_t *this, +	  loc_t *loc) +{ +	afr_private_t * priv       = NULL; +	afr_local_t   * local      = NULL; +	xlator_t **     children   = NULL; + +	int             call_child = 0; + +	int32_t         op_ret     = -1; +	int32_t         op_errno   = 0; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (this->private, out); + +	priv     = this->private; +	VALIDATE_OR_GOTO (priv->children, out); + +	children = priv->children; + +	ALLOC_OR_GOTO (local, afr_local_t, out); + +	frame->local = local; + +	call_child = afr_deitransform (loc->inode->ino, priv->child_count); +	loc_copy (&local->loc, loc); + +	/*  +	   if stat fails from the deitranform'd child, we try +	   all children starting with the first one +	*/ +	local->cont.stat.last_tried = -1; +	local->cont.stat.ino = loc->inode->ino; + +	STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child, +			   children[call_child], +			   children[call_child]->fops->stat, +			   loc); + +	op_ret = 0; +out: +	if (op_ret == -1) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); +	} + +	return 0; +} + + +/* }}} */ + +/* {{{ fstat */ + +int32_t +afr_fstat_cbk (call_frame_t *frame, void *cookie, +	       xlator_t *this, int32_t op_ret, int32_t op_errno, +	       struct stat *buf) +{ +	afr_private_t * priv     = NULL; +	afr_local_t *   local    = NULL; +	xlator_t **     children = NULL; + +	int deitransform_child = -1; + +	int unwind     = 1; +	int last_tried = -1; +	int this_try = -1; + +	priv     = this->private; +	children = priv->children; + +	deitransform_child = (long) cookie; + +	local = frame->local; + +	if (op_ret == -1) { +	retry: +		last_tried = local->cont.fstat.last_tried; + +		if (all_tried (last_tried, priv->child_count)) { +			goto out; +		} +		this_try   = ++local->cont.fstat.last_tried; + +		if (this_try == deitransform_child) { +			/*  +			   skip the deitransform'd child since if we are here +			   we must have already tried that child +			*/ +			goto retry; +		} +	        + +		unwind = 0; + +		STACK_WIND_COOKIE (frame, afr_fstat_cbk, +				   (void *) (long) deitransform_child, +				   children[this_try],  +				   children[this_try]->fops->fstat, +				   local->fd); +	} + +out: +	if (unwind) { +		if (op_ret != -1) +			buf->st_ino = local->cont.fstat.ino; + +		AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); +	} + +	return 0; +} + + +int32_t +afr_fstat (call_frame_t *frame, xlator_t *this, +	   fd_t *fd) +{ +	afr_private_t * priv       = NULL; +	afr_local_t   * local      = NULL; +	xlator_t **     children   = NULL; + +	int             call_child = 0; + +	int32_t         op_ret     = -1; +	int32_t         op_errno   = 0; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (fd, out); +	VALIDATE_OR_GOTO (this->private, out); + +	priv     = this->private; +	VALIDATE_OR_GOTO (priv->children, out); + +	children = priv->children; + +	ALLOC_OR_GOTO (local, afr_local_t, out); + +	frame->local = local; + +	VALIDATE_OR_GOTO (fd->inode, out); + +	call_child = afr_deitransform (fd->inode->ino, priv->child_count); + +	/*  +	   if fstat fails from the deitranform'd child, we try +	   all children starting with the first one +	*/ +	local->cont.fstat.last_tried = -1; +	local->cont.fstat.ino = fd->inode->ino; +	local->fd = fd_ref (fd); + +	STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child, +			   children[call_child], +			   children[call_child]->fops->fstat, +			   fd); + +	op_ret = 0; +out: +	if (op_ret == -1) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); +	} + +	return 0; +} + +/* }}} */ + +/* {{{ readlink */ + +int32_t +afr_readlink_cbk (call_frame_t *frame, void *cookie, +		  xlator_t *this, int32_t op_ret, int32_t op_errno, +		  const char *buf) +{ +	afr_private_t * priv     = NULL; +	afr_local_t *   local    = NULL; +	xlator_t **     children = NULL; + +	int unwind     = 1; +	int last_tried = -1; +	int this_try = -1; + +	priv     = this->private; +	children = priv->children; + +	local = frame->local; + +	if (op_ret == -1) { +		last_tried = local->cont.readlink.last_tried; + +		if (all_tried (last_tried, priv->child_count)) { +			goto out; +		} +		this_try = ++local->cont.readlink.last_tried; + +		unwind = 0; +		STACK_WIND_COOKIE (frame, afr_readlink_cbk, +				   (void *) (long) this_try, +				   children[this_try],  +				   children[this_try]->fops->readlink, +				   &local->loc, +				   local->cont.readlink.size); +	} + +out: +	if (unwind) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); +	} + +	return 0; +} + + +int32_t +afr_readlink (call_frame_t *frame, xlator_t *this, +	      loc_t *loc, size_t size) +{ +	afr_private_t * priv       = NULL; +	xlator_t **     children   = NULL; +	int             call_child = 0; +	afr_local_t     *local     = NULL; + +	int32_t op_ret   = -1; +	int32_t op_errno = 0; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (this->private, out); + +	priv     = this->private; +	VALIDATE_OR_GOTO (priv->children, out); + +	children = priv->children; + +	ALLOC_OR_GOTO (local, afr_local_t, out); + +	frame->local = local; + +	call_child = afr_first_up_child (priv); +	if (call_child == -1) { +		op_errno = ENOTCONN; +		gf_log (this->name, GF_LOG_ERROR, +			"no child is up :("); +		goto out; +	} + +	local->cont.readlink.last_tried = call_child; +	loc_copy (&local->loc, loc); +	local->cont.readlink.size       = size; + +	STACK_WIND_COOKIE (frame, afr_readlink_cbk, +			   (void *) (long) call_child, +			   children[call_child], children[call_child]->fops->readlink, +			   loc, size); + +	op_ret = 0; +out: +	if (op_ret == -1) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); +	} +	return 0; +} + + +/* }}} */ + +/* {{{ getxattr */ + +int32_t +afr_getxattr_cbk (call_frame_t *frame, void *cookie, +		  xlator_t *this, int32_t op_ret, int32_t op_errno, +		  dict_t *dict) +{ +	afr_private_t * priv     = NULL; +	afr_local_t *   local    = NULL; +	xlator_t **     children = NULL; + +	int unwind     = 1; +	int last_tried = -1; +	int this_try = -1; + +	priv     = this->private; +	children = priv->children; + +	local = frame->local; + +	if (op_ret == -1) { +		last_tried = local->cont.getxattr.last_tried; + +		if (all_tried (last_tried, priv->child_count)) { +			goto out; +		} +		this_try = ++local->cont.getxattr.last_tried; + +		unwind = 0; +		STACK_WIND_COOKIE (frame, afr_getxattr_cbk, +				   (void *) (long) this_try, +				   children[this_try],  +				   children[this_try]->fops->getxattr, +				   &local->loc, +				   local->cont.getxattr.name); +	} + +out: +	if (unwind) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, dict); +	} + +	return 0; +} + + +int32_t +afr_getxattr (call_frame_t *frame, xlator_t *this, +	      loc_t *loc, const char *name) +{ +	afr_private_t *   priv       = NULL; +	xlator_t **       children   = NULL; +	int               call_child = 0; +	afr_local_t     * local      = NULL; + +	int32_t op_ret   = -1; +	int32_t op_errno = 0; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (this->private, out); + +	priv     = this->private; +	VALIDATE_OR_GOTO (priv->children, out); + +	children = priv->children; + +	ALLOC_OR_GOTO (local, afr_local_t, out); +	frame->local = local; + +	call_child = afr_first_up_child (priv); +	if (call_child == -1) { +		op_errno = ENOTCONN; +		gf_log (this->name, GF_LOG_ERROR, +			"no child is up :("); +		goto out; +	} + +	local->cont.getxattr.last_tried = call_child; +	loc_copy (&local->loc, loc); +	if (name) +	  local->cont.getxattr.name       = strdup (name); + +	STACK_WIND_COOKIE (frame, afr_getxattr_cbk, +			   (void *) (long) call_child, +			   children[call_child], children[call_child]->fops->getxattr, +			   loc, name); + +	op_ret = 0; +out: +	if (op_ret == -1) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); +	} +	return 0; +} + + +/* }}} */ + +/* {{{ readv */ + +/** + * read algorithm: + *  + * if the user has specified a read subvolume, use it + * otherwise - + *   use the inode number to hash it to one of the subvolumes, and + *   read from there (to balance read load) + * + * if any of the above read's fail, try the children in sequence + * beginning at the beginning + */ +  +int32_t +afr_readv_cbk (call_frame_t *frame, void *cookie, +	       xlator_t *this, int32_t op_ret, int32_t op_errno, +	       struct iovec *vector, int32_t count, struct stat *buf) +{ +	afr_private_t * priv     = NULL; +	afr_local_t *   local    = NULL; +	xlator_t **     children = NULL; + +	int unwind     = 1; +	int last_tried = -1; +	int this_try = -1; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (this->private, out); + +	priv     = this->private; +	VALIDATE_OR_GOTO (priv->children, out); + +	children = priv->children; + +	local = frame->local; + +	if (op_ret == -1) { +	retry: +		last_tried = local->cont.readv.last_tried; + +		if (all_tried (last_tried, priv->child_count)) { +			goto out; +		} +		this_try = ++local->cont.readv.last_tried; + +		if (this_try == priv->read_child) { +			/*  +			   skip the read child since if we are here +			   we must have already tried that child +			*/ +			goto retry; +		} + +		unwind = 0; + +		STACK_WIND_COOKIE (frame, afr_readv_cbk, +				   (void *) (long) this_try, +				   children[this_try],  +				   children[this_try]->fops->readv, +				   local->fd, local->cont.readv.size, +				   local->cont.readv.offset); +	} + +out: +	if (unwind) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf); +	} + +	return 0; +} + + +int32_t +afr_readv (call_frame_t *frame, xlator_t *this, +	   fd_t *fd, size_t size, off_t offset) +{ +	afr_private_t * priv       = NULL; +	afr_local_t   * local      = NULL; +	xlator_t **     children   = NULL; + +	int             call_child = 0; + +	int32_t         op_ret     = -1; +	int32_t         op_errno   = 0; + +	VALIDATE_OR_GOTO (frame, out); +	VALIDATE_OR_GOTO (this, out); +	VALIDATE_OR_GOTO (this->private, out); +	VALIDATE_OR_GOTO (fd, out); + +	priv     = this->private; +	children = priv->children; + +	ALLOC_OR_GOTO (local, afr_local_t, out); + +	frame->local = local; + +	if (priv->read_child != -1) { +		call_child = priv->read_child; + +		/*  +		   if read fails from the read child, we try +		   all children starting with the first one +		*/ +		local->cont.readv.last_tried = -1; +	} else { +		call_child = afr_first_up_child (priv); +		if (call_child == -1) { +			op_errno = ENOTCONN; +			gf_log (this->name, GF_LOG_ERROR, +				"no child is up :("); +			goto out; +		} + +		local->cont.readv.last_tried = call_child; +	} + +	local->fd                    = fd_ref (fd); + +	local->cont.readv.size       = size; +	local->cont.readv.offset     = offset; + +	STACK_WIND_COOKIE (frame, afr_readv_cbk, +			   (void *) (long) call_child, +			   children[call_child], +			   children[call_child]->fops->readv, +			   fd, size, offset); + +	op_ret = 0; +out: +	if (op_ret == -1) { +		AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, 0, NULL); +	} +	return 0; +} + +/* }}} */  | 
