From 74612a456ad1602f8038fae79fee654eb427602a Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Tue, 24 Nov 2009 08:45:09 +0000 Subject: cluster/afr: Do self-heal on reopened fds. This patch brings in partial support for self-heal of open fds. The precondition is that the fd should have been opened successfully during the initial open() (or create()), and we assume that protocol/client has successfully reopened the fd when the subvolume comes back up. It works by doing an "up/down flush" (a dummy flush transaction to do post-op wherever necessary) and then triggering data self-heal on the file in the post-post-op hook of the dummy flush transaction. This ensures that any writes that come in during self-heal will wait until self-heal completes. The up/down flush is also done when a subvolume goes down, so that post-op is done on all subvolumes where pre-op was done. Signed-off-by: Vikas Gorur Signed-off-by: Anand V. Avati BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170 --- xlators/cluster/afr/src/afr-open.c | 356 +++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 xlators/cluster/afr/src/afr-open.c (limited to 'xlators/cluster/afr/src/afr-open.c') diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c new file mode 100644 index 00000000000..945f5cddf17 --- /dev/null +++ b/xlators/cluster/afr/src/afr-open.c @@ -0,0 +1,356 @@ +/* + Copyright (c) 2007-2009 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include +#include +#include +#include +#include + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "statedump.h" + +#include "fd.h" + +#include "afr-inode-read.h" +#include "afr-inode-write.h" +#include "afr-dir-read.h" +#include "afr-dir-write.h" +#include "afr-transaction.h" + +#include "afr-self-heal.h" + + +int +afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *prebuf, + struct stat *postbuf) +{ + afr_local_t * local = frame->local; + int ret = 0; + + ret = afr_fd_ctx_set (this, local->fd); + + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + + AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, + local->fd); + return 0; +} + + +int +afr_open_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int ret = 0; + + int call_count = -1; + + priv = this->private; + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->success_count++; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if ((local->cont.open.flags & O_TRUNC) + && (local->op_ret >= 0)) { + STACK_WIND (frame, afr_open_ftruncate_cbk, + this, this->fops->ftruncate, + fd, 0); + } else { + ret = afr_fd_ctx_set (this, fd); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "could not set fd ctx for fd=%p", + fd); + + local->op_ret = -1; + local->op_errno = -ret; + } + + AFR_STACK_UNWIND (open, frame, local->op_ret, + local->op_errno, local->fd); + } + } + + return 0; +} + + +int +afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, int32_t wbflags) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int i = 0; + int ret = -1; + + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t wind_flags = flags & (~O_TRUNC); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + + if (afr_is_split_brain (this, loc->inode)) { + /* self-heal failed */ + op_errno = EIO; + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + call_count = local->call_count; + + local->cont.open.flags = flags; + local->fd = fd_ref (fd); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->open, + loc, wind_flags, fd, wbflags); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd); + } + + return 0; +} + + +int +afr_up_down_flush_sh_completion_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.post_post_op (frame, this); + + return 0; +} + + +int +afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + sh->calling_fop = GF_FOP_FLUSH; + +// sh->healing_fd = local->fd; + +// sh->healing_fd_opened = _gf_true; + + local->cont.lookup.inode = local->fd->inode; + + inode_path (local->fd->inode, NULL, (char **)&local->loc.path); + local->loc.name = strrchr (local->loc.path, '/'); + local->loc.inode = inode_ref (local->fd->inode); + local->loc.parent = inode_parent (local->fd->inode, 0, NULL); + + sh->data_lock_held = _gf_true; + + local->need_data_self_heal = _gf_true; + local->cont.lookup.buf.st_mode = local->fd->inode->st_mode; + local->child_count = afr_up_children_count (priv->child_count, + local->child_up); + + sh->flush_self_heal_cbk = afr_up_down_flush_sh_completion_cbk; + + afr_self_heal (frame, this, afr_up_down_flush_sh_completion_cbk, + _gf_false); + + return 0; +} + + +int +afr_up_down_flush_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + local->transaction.resume (frame, this); + return 0; +} + + +int +afr_up_down_flush_done (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + int _ret = -1; + int i = 0; + + priv = this->private; + local = frame->local; + + LOCK (&local->fd->lock); + { + _ret = __fd_ctx_get (local->fd, this, &ctx); + + if (_ret < 0) { + goto out; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + fd_ctx->down_count = priv->down_count; + fd_ctx->up_count = priv->up_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) + fd_ctx->pre_op_done[i] = 0; + } + } +out: + UNLOCK (&local->fd->lock); + + local->up_down_flush_cbk (frame, this); + + return 0; +} + + +int +afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, + afr_flush_type type) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int op_ret = -1; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + local = frame->local; + + local->op = GF_FOP_FLUSH; + + local->fd = fd_ref (local->fd); + + local->transaction.fop = afr_up_down_flush_wind; + local->transaction.done = afr_up_down_flush_done; + + switch (type) { + case AFR_CHILD_UP_FLUSH: + local->transaction.post_post_op = afr_up_down_flush_post_post_op; + break; + + case AFR_CHILD_DOWN_FLUSH: + local->transaction.post_post_op = NULL; + break; + } + + local->transaction.start = 0; + local->transaction.len = 0; + + gf_log (this->name, GF_LOG_TRACE, + "doing up/down flush on fd=%p", + fd); + + afr_transaction (frame, this, AFR_FLUSH_TRANSACTION); + + op_ret = 0; +out: + return 0; +} -- cgit