summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-inode-read.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/afr/src/afr-inode-read.c')
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c721
1 files changed, 721 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
new file mode 100644
index 00000000000..a6c99ec0576
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -0,0 +1,721 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+#include "afr.h"
+
+
+/**
+ * Common algorithm for inode read calls:
+ *
+ * - Try the fop on the first child that is up
+ * - if we have failed due to ENOTCONN:
+ * try the next child
+ *
+ * Applicable to: access, stat, fstat, readlink, getxattr
+ */
+
+/* {{{ access */
+
+int32_t
+afr_access_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.access.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.access.last_tried;
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->access,
+ &local->loc, local->cont.access.mask);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.access.last_tried = call_child;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->access,
+ loc, mask);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ stat */
+
+int32_t
+afr_stat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int deitransform_child = -1;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ deitransform_child = (long) cookie;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.stat.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.stat.last_tried;
+
+ if (this_try == deitransform_child) {
+ goto retry;
+ }
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk,
+ (void *) (long) deitransform_child,
+ children[this_try],
+ children[this_try]->fops->stat,
+ &local->loc);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret != -1)
+ buf->st_ino = local->cont.stat.ino;
+
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_stat (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int call_child = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ call_child = afr_deitransform (loc->inode->ino, priv->child_count);
+ loc_copy (&local->loc, loc);
+
+ /*
+ if stat fails from the deitranform'd child, we try
+ all children starting with the first one
+ */
+ local->cont.stat.last_tried = -1;
+ local->cont.stat.ino = loc->inode->ino;
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->stat,
+ loc);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ fstat */
+
+int32_t
+afr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int deitransform_child = -1;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ deitransform_child = (long) cookie;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.fstat.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.fstat.last_tried;
+
+ if (this_try == deitransform_child) {
+ /*
+ skip the deitransform'd child since if we are here
+ we must have already tried that child
+ */
+ goto retry;
+ }
+
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk,
+ (void *) (long) deitransform_child,
+ children[this_try],
+ children[this_try]->fops->fstat,
+ local->fd);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret != -1)
+ buf->st_ino = local->cont.fstat.ino;
+
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int call_child = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ VALIDATE_OR_GOTO (fd->inode, out);
+
+ call_child = afr_deitransform (fd->inode->ino, priv->child_count);
+
+ /*
+ if fstat fails from the deitranform'd child, we try
+ all children starting with the first one
+ */
+ local->cont.fstat.last_tried = -1;
+ local->cont.fstat.ino = fd->inode->ino;
+ local->fd = fd_ref (fd);
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->fstat,
+ fd);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ readlink */
+
+int32_t
+afr_readlink_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ const char *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.readlink.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.readlink.last_tried;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->readlink,
+ &local->loc,
+ local->cont.readlink.size);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.readlink.last_tried = call_child;
+ loc_copy (&local->loc, loc);
+ local->cont.readlink.size = size;
+
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->readlink,
+ loc, size);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ getxattr */
+
+int32_t
+afr_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.getxattr.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.getxattr.last_tried;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, dict);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t * local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ frame->local = local;
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.getxattr.last_tried = call_child;
+ loc_copy (&local->loc, loc);
+ if (name)
+ local->cont.getxattr.name = strdup (name);
+
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->getxattr,
+ loc, name);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ readv */
+
+/**
+ * read algorithm:
+ *
+ * if the user has specified a read subvolume, use it
+ * otherwise -
+ * use the inode number to hash it to one of the subvolumes, and
+ * read from there (to balance read load)
+ *
+ * if any of the above read's fail, try the children in sequence
+ * beginning at the beginning
+ */
+
+int32_t
+afr_readv_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.readv.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.readv.last_tried;
+
+ if (this_try == priv->read_child) {
+ /*
+ skip the read child since if we are here
+ we must have already tried that child
+ */
+ goto retry;
+ }
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int call_child = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ if (priv->read_child != -1) {
+ call_child = priv->read_child;
+
+ /*
+ if read fails from the read child, we try
+ all children starting with the first one
+ */
+ local->cont.readv.last_tried = -1;
+ } else {
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.readv.last_tried = call_child;
+ }
+
+ local->fd = fd_ref (fd);
+
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readv,
+ fd, size, offset);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, 0, NULL);
+ }
+ return 0;
+}
+
+/* }}} */