diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-inode-write.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 2111 |
1 files changed, 1622 insertions, 489 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 4135ba947..c1ec69a55 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -48,45 +39,151 @@ #include "afr-transaction.h" #include "afr-self-heal-common.h" +void +__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child, + xlator_t *this, int32_t *op_ret, int32_t *op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (afr_fop_failed (*op_ret, *op_errno)) { + local->child_errno[child_index] = *op_errno; + + switch (local->op) { + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + if (*op_errno != EFBIG) + afr_transaction_fop_failed (frame, this, + child_index); + break; + default: + afr_transaction_fop_failed (frame, this, child_index); + break; + } + local->op_errno = *op_errno; + goto out; + } + + if ((local->success_count == 0) || (read_child == child_index)) { + local->op_ret = *op_ret; + if (prebuf) + local->cont.inode_wfop.prebuf = *prebuf; + if (postbuf) + local->cont.inode_wfop.postbuf = *postbuf; + } + + local->success_count++; +out: + return; +} + /* {{{ writev */ -int +void +afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame) +{ + afr_local_t *src_local = NULL; + afr_local_t *dst_local = NULL; + + src_local = src_frame->local; + dst_local = dst_frame->local; + + dst_local->op_ret = src_local->op_ret; + dst_local->op_errno = src_local->op_errno; + dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf; + dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf; +} + +void afr_writev_unwind (call_frame_t *frame, xlator_t *this) { afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + local = frame->local; + + AFR_STACK_UNWIND (writev, frame, + local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); +} + +call_frame_t* +afr_transaction_detach_fop_frame (call_frame_t *frame) +{ + afr_local_t * local = NULL; + call_frame_t *fop_frame = NULL; local = frame->local; LOCK (&frame->lock); { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; + fop_frame = local->transaction.main_frame; local->transaction.main_frame = NULL; } UNLOCK (&frame->lock); - if (main_frame) { - AFR_STACK_UNWIND (writev, main_frame, - local->op_ret, local->op_errno, - &local->cont.writev.prebuf, - &local->cont.writev.postbuf); + return fop_frame; +} + +int +afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *fop_frame = NULL; + + fop_frame = afr_transaction_detach_fop_frame (frame); + + if (fop_frame) { + afr_writev_copy_outvars (frame, fop_frame); + afr_writev_unwind (fop_frame, this); } return 0; } +static void +afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + + local = frame->local; + priv = this->private; + /* + * We already have the best case result of the writev calls staged + * as the return value. Any writev that returns some value less + * than the best case is now out of sync, so mark the fop as + * failed. Note that fops that have returned with errors have + * already been marked as failed. + */ + for (i = 0; i < priv->child_count; i++) { + if ((!local->replies[i].valid) || + (local->replies[i].op_ret == -1)) + continue; + + if (local->replies[i].op_ret < local->op_ret) + afr_transaction_fop_failed(frame, this, i); + } +} int afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { afr_local_t * local = NULL; + afr_private_t *priv = NULL; + call_frame_t *fop_frame = NULL; int child_index = (long) cookie; int call_count = -1; int read_child = 0; + int ret = 0; + uint32_t open_fd_count = 0; + uint32_t write_is_append = 0; local = frame->local; + priv = this->private; read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); @@ -96,32 +193,81 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->read_child_returned = _gf_true; } - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.writev.prebuf = *prebuf; - local->cont.writev.postbuf = *postbuf; - } - - if (child_index == read_child) { - local->cont.writev.prebuf = *prebuf; - local->cont.writev.postbuf = *postbuf; + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, prebuf, postbuf, + xdata); + + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + + + /* stage the best case return value for unwind */ + if ((local->success_count == 0) || (op_ret > local->op_ret)) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + + if (op_ret != -1) { + if (xdata) { + ret = dict_get_uint32 (xdata, + GLUSTERFS_OPEN_FD_COUNT, + &open_fd_count); + if ((ret == 0) && + (open_fd_count > local->open_fd_count)) { + local->open_fd_count = open_fd_count; + local->update_open_fd_count = _gf_true; + } + + write_is_append = 0; + ret = dict_get_uint32 (xdata, + GLUSTERFS_WRITE_IS_APPEND, + &write_is_append); + if (ret || !write_is_append) + local->append_write = _gf_false; } - } - local->op_errno = op_errno; + } } UNLOCK (&frame->lock); call_count = afr_frame_return (frame); if (call_count == 0) { - local->transaction.unwind (frame, this); - local->transaction.resume (frame, this); + if (local->update_open_fd_count) + afr_handle_open_fd_count (frame, this); + + if (!local->stable_write && !local->append_write) + /* An appended write removes the necessity to + fsync() the file. This is because self-heal + has the logic to check for larger file when + the xattrs are not reliably pointing at + a stale file. + */ + afr_fd_report_unstable_write (this, local->fd); + + afr_writev_handle_short_writes (frame, this); + if (afr_any_fops_failed (local, priv)) { + //Don't unwind until post-op is complete + local->transaction.resume (frame, this); + } else { + /* + * Generally inode-write fops do transaction.unwind then + * transaction.resume, but writev needs to make sure that + * delayed post-op frame is placed in fdctx before unwind + * happens. This prevents the race of flush doing the + * changelog wakeup first in fuse thread and then this + * writev placing its delayed post-op frame in fdctx. + * This helps flush make sure all the delayed post-ops are + * completed. + */ + + fop_frame = afr_transaction_detach_fop_frame (frame); + afr_writev_copy_outvars (frame, fop_frame); + local->transaction.resume (frame, this); + afr_writev_unwind (fop_frame, this); + } } return 0; } @@ -133,6 +279,8 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int i = 0; int call_count = -1; + dict_t *xdata = NULL; + GF_UNUSED int ret = 0; local = frame->local; priv = this->private; @@ -146,6 +294,28 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this) } local->call_count = call_count; + local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies), + gf_afr_mt_reply_t); + if (!local->replies) { + local->op_ret = -1; + local->op_errno = ENOMEM; + local->transaction.unwind(frame, this); + local->transaction.resume(frame, this); + return 0; + } + + xdata = dict_new (); + if (xdata) { + ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT, + sizeof (uint32_t)); + ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND, + 0); + /* Set append_write to be true speculatively. If on any + server it turns not be true, we unset it in the + callback. + */ + local->append_write = _gf_true; + } for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) { @@ -157,13 +327,18 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this) local->cont.writev.vector, local->cont.writev.count, local->cont.writev.offset, - local->cont.writev.iobref); + local->cont.writev.flags, + local->cont.writev.iobref, + xdata); if (!--call_count) break; } } + if (xdata) + dict_unref (xdata); + return 0; } @@ -203,7 +378,7 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) } transaction_frame->local = local; - frame->local = NULL; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); local->op = GF_FOP_WRITE; @@ -211,10 +386,17 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) local->transaction.fop = afr_writev_wind; local->transaction.done = afr_writev_done; - local->transaction.unwind = afr_writev_unwind; + local->transaction.unwind = afr_transaction_writev_unwind; local->transaction.main_frame = frame; if (local->fd->flags & O_APPEND) { + /* + * Backend vfs ignores the 'offset' for append mode fd so + * locking just the region provided for the writev does not + * give consistency gurantee. The actual write may happen at a + * completely different range than the one provided by the + * offset, len in the fop. So lock the entire file. + */ local->transaction.start = 0; local->transaction.len = 0; } else { @@ -223,156 +405,91 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) local->cont.writev.count); } - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } op_ret = 0; out: - if (op_ret == -1) { + if (op_ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL); + AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL); } return 0; } -static int -afr_prepare_loc (call_frame_t *frame, fd_t *fd) +static void +afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this) { - afr_local_t *local = NULL; - char *name = NULL; - char *path = NULL; - int ret = 0; - - if ((!fd) || (!fd->inode)) - return -1; - - local = frame->local; - ret = inode_path (fd->inode, NULL, (char **)&path); - if (ret <= 0) { - gf_log (frame->this->name, GF_LOG_DEBUG, - "Unable to get path for gfid: %s", - uuid_utoa (fd->inode->gfid)); - return -1; - } - - if (local->loc.path) { - if (strcmp (path, local->loc.path)) - gf_log (frame->this->name, GF_LOG_DEBUG, - "overwriting old loc->path %s with %s", - local->loc.path, path); - GF_FREE ((char *)local->loc.path); - } - local->loc.path = path; - - name = strrchr (local->loc.path, '/'); - if (name) - name++; - local->loc.name = name; - - if (local->loc.inode) { - inode_unref (local->loc.inode); - } - local->loc.inode = inode_ref (fd->inode); - - if (local->loc.parent) { - inode_unref (local->loc.parent); + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + char *reason = NULL; + int32_t op_errno = 0; + int ret = 0; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: " + "fd: %p, inode: %p", fd, + fd ? fd->inode : NULL); + goto out; } - local->loc.parent = inode_parent (local->loc.inode, 0, NULL); - - return 0; -} - -afr_fd_paused_call_t* -afr_paused_call_create (call_frame_t *frame) -{ - afr_local_t *local = NULL; - afr_fd_paused_call_t *paused_call = NULL; + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); local = frame->local; - GF_ASSERT (local->fop_call_continue); - - paused_call = GF_CALLOC (1, sizeof (*paused_call), - gf_afr_fd_paused_call_t); - if (paused_call) { - INIT_LIST_HEAD (&paused_call->call_list); - paused_call->frame = frame; - } - - return paused_call; -} - -static int -afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx) -{ - afr_fd_paused_call_t *paused_call = NULL; - int ret = 0; - - paused_call = afr_paused_call_create (frame); - if (paused_call) - list_add (&paused_call->call_list, &fd_ctx->paused_calls); - else - ret = -ENOMEM; - - return ret; -} + ret = afr_local_init (local, this->private, &op_errno); + if (ret < 0) + goto out; -static void -afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - inode_t *inode = NULL; - char *reason = NULL; + local->loc.inode = inode_ref (fd->inode); + ret = loc_path (&local->loc, NULL); + if (ret < 0) + goto out; - local = frame->local; sh = &local->self_heal; - inode = local->fd->inode; - - sh->do_missing_entry_self_heal = _gf_true; - sh->do_gfid_self_heal = _gf_true; - sh->do_data_self_heal = _gf_true; + sh->do_metadata_self_heal = _gf_true; + if (fd->inode->ia_type == IA_IFREG) + sh->do_data_self_heal = _gf_true; + else if (fd->inode->ia_type == IA_IFDIR) + sh->do_entry_self_heal = _gf_true; reason = "subvolume came online"; - afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type, - reason, NULL, NULL); + afr_launch_self_heal (frame, this, fd->inode, _gf_true, + fd->inode->ia_type, reason, NULL, NULL); + return; +out: + AFR_STACK_DESTROY (frame); } -int -afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop) -{ - int ret = 0; - int i = 0; - afr_fd_ctx_t *fd_ctx = NULL; - gf_boolean_t need_self_heal = _gf_false; - int *need_open = NULL; - int need_open_count = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - gf_boolean_t fop_continue = _gf_true; +void +afr_open_fd_fix (fd_t *fd, xlator_t *this) +{ + int ret = 0; + int i = 0; + afr_fd_ctx_t *fd_ctx = NULL; + gf_boolean_t need_self_heal = _gf_false; + int *need_open = NULL; + size_t need_open_count = 0; + afr_private_t *priv = NULL; - local = frame->local; priv = this->private; - GF_ASSERT (local->fd); - if (pause_fop) - GF_ASSERT (local->fop_call_continue); - - ret = afr_prepare_loc (frame, local->fd); - if (ret < 0) { - //File does not exist we cant open it. - ret = 0; + if (!afr_is_fd_fixable (fd)) goto out; - } - fd_ctx = afr_fd_ctx_get (local->fd, this); - if (!fd_ctx) { - ret = -EINVAL; - goto unlock; - } + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) + goto out; - LOCK (&local->fd->lock); + LOCK (&fd->lock); { if (fd_ctx->up_count < priv->up_count) { need_self_heal = _gf_true; @@ -380,67 +497,44 @@ afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop) fd_ctx->down_count = priv->down_count; } + need_open = alloca (priv->child_count * sizeof (*need_open)); for (i = 0; i < priv->child_count; i++) { - if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) && - local->child_up[i]) { - fd_ctx->opened_on[i] = AFR_FD_OPENING; - if (!need_open) - need_open = GF_CALLOC (priv->child_count, - sizeof (*need_open), - gf_afr_mt_int32_t); - need_open[i] = 1; - need_open_count++; - } else if (pause_fop && local->child_up[i] && - (fd_ctx->opened_on[i] == AFR_FD_OPENING)) { - local->fop_paused = _gf_true; - } - } + need_open[i] = 0; + if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED) + continue; - if (local->fop_paused) { - GF_ASSERT (pause_fop); - gf_log (this->name, GF_LOG_INFO, "Pause fd %p", - local->fd); - ret = afr_pause_fd_fop (frame, this, fd_ctx); - if (ret) - goto unlock; - fop_continue = _gf_false; + if (!priv->child_up[i]) + continue; + + fd_ctx->opened_on[i] = AFR_FD_OPENING; + + need_open[i] = 1; + need_open_count++; } } -unlock: - UNLOCK (&local->fd->lock); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s", - local->loc.path); - fop_continue = _gf_false; + UNLOCK (&fd->lock); + if (ret) goto out; - } if (need_self_heal) - afr_trigger_open_fd_self_heal (frame, this); + afr_trigger_open_fd_self_heal (fd, this); if (!need_open_count) goto out; - gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd); - afr_fix_open (frame, this, fd_ctx, need_open_count, need_open); - fop_continue = _gf_false; + afr_fix_open (this, fd, need_open_count, need_open); out: - if (need_open) - GF_FREE (need_open); - if (fop_continue && local->fop_call_continue) - local->fop_call_continue (frame, this); - return ret; + return; } int afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; int ret = -1; - int op_ret = -1; int op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -449,37 +543,41 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - QUORUM_CHECK(writev,out); - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; goto out; } - frame->local = local; + QUORUM_CHECK(writev,out); + + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; local->cont.writev.vector = iov_dup (vector, count); local->cont.writev.count = count; local->cont.writev.offset = offset; + local->cont.writev.flags = flags; local->cont.writev.iobref = iobref_ref (iobref); local->fd = fd_ref (fd); - local->fop_call_continue = afr_do_writev; - ret = afr_open_fd_fix (frame, this, _gf_true); - if (ret) { - op_errno = -ret; - goto out; - } + /* detect here, but set it in writev_wind_cbk *after* the unstable + write is performed + */ + local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC)); - op_ret = 0; + afr_open_fd_fix (fd, this); + + afr_do_writev (frame, this); + + ret = 0; out: - if (op_ret == -1) { - AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL); - } + if (ret < 0) + AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -508,8 +606,9 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this) if (main_frame) { AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno, - &local->cont.truncate.prebuf, - &local->cont.truncate.postbuf); + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); } return 0; @@ -519,17 +618,14 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this) int afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { afr_local_t * local = NULL; - afr_private_t * priv = NULL; int child_index = (long) cookie; int read_child = 0; int call_count = -1; - int need_unwind = 0; local = frame->local; - priv = this->private; read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL); @@ -539,38 +635,22 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->read_child_returned = _gf_true; } - if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG) - afr_transaction_fop_failed (frame, this, child_index); - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.truncate.prebuf = *prebuf; - local->cont.truncate.postbuf = *postbuf; - } - - if (child_index == read_child) { - local->cont.truncate.prebuf = *prebuf; - local->cont.truncate.postbuf = *postbuf; - } - - local->success_count++; - - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } + if (prebuf->ia_size != postbuf->ia_size) + local->stable_write = _gf_false; } - local->op_errno = op_errno; + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, prebuf, postbuf, + xdata); } UNLOCK (&frame->lock); - if (need_unwind) - local->transaction.unwind (frame, this); - call_count = afr_frame_return (frame); if (call_count == 0) { + if (local->stable_write && afr_txn_nothing_failed (frame, this)) + local->transaction.unwind (frame, this); + local->transaction.resume (frame, this); } @@ -598,6 +678,7 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this) } local->call_count = call_count; + local->stable_write = _gf_true; for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) { @@ -606,7 +687,8 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this) priv->children[i], priv->children[i]->fops->truncate, &local->loc, - local->cont.truncate.offset); + local->cont.truncate.offset, + NULL); if (!--call_count) break; @@ -634,13 +716,12 @@ afr_truncate_done (call_frame_t *frame, xlator_t *this) int afr_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset) + loc_t *loc, off_t offset, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; call_frame_t *transaction_frame = NULL; int ret = -1; - int op_ret = -1; int op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -649,24 +730,20 @@ afr_truncate (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(truncate,out); + QUORUM_CHECK(truncate,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { + op_errno = ENOMEM; goto out; } - ALLOC_OR_GOTO (local, afr_local_t, out); + AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); + local = transaction_frame->local; - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) goto out; - } - - transaction_frame->local = local; - - local->op_ret = -1; local->cont.truncate.offset = offset; @@ -680,14 +757,18 @@ afr_truncate (call_frame_t *frame, xlator_t *this, local->transaction.start = offset; local->transaction.len = 0; - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - op_ret = 0; + ret = 0; out: - if (op_ret == -1) { + if (ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, NULL); + AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); } return 0; @@ -718,8 +799,9 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this) if (main_frame) { AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno, - &local->cont.ftruncate.prebuf, - &local->cont.ftruncate.postbuf); + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); } return 0; } @@ -728,17 +810,14 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this) int afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { afr_local_t * local = NULL; - afr_private_t * priv = NULL; int child_index = (long) cookie; int call_count = -1; - int need_unwind = 0; int read_child = 0; local = frame->local; - priv = this->private; read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); @@ -748,38 +827,22 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->read_child_returned = _gf_true; } - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.ftruncate.prebuf = *prebuf; - local->cont.ftruncate.postbuf = *postbuf; - } - - if (child_index == read_child) { - local->cont.ftruncate.prebuf = *prebuf; - local->cont.ftruncate.postbuf = *postbuf; - } - - local->success_count++; - - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } + if (prebuf->ia_size != postbuf->ia_size) + local->stable_write = _gf_false; } - local->op_errno = op_errno; + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, prebuf, postbuf, + xdata); } UNLOCK (&frame->lock); - if (need_unwind) - local->transaction.unwind (frame, this); - call_count = afr_frame_return (frame); if (call_count == 0) { + if (local->stable_write && afr_txn_nothing_failed (frame, this)) + local->transaction.unwind (frame, this); + local->transaction.resume (frame, this); } @@ -807,6 +870,7 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this) } local->call_count = call_count; + local->stable_write = _gf_true; for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) { @@ -814,7 +878,9 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this) (void *) (long) i, priv->children[i], priv->children[i]->fops->ftruncate, - local->fd, local->cont.ftruncate.offset); + local->fd, + local->cont.ftruncate.offset, + NULL); if (!--call_count) break; @@ -869,14 +935,19 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this) local->transaction.start = local->cont.ftruncate.offset; local->transaction.len = 0; - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } op_ret = 0; out: - if (op_ret == -1) { + if (op_ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL); + AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, + NULL, NULL); } return 0; @@ -885,13 +956,12 @@ out: int afr_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; call_frame_t *transaction_frame = NULL; int ret = -1; - int op_ret = -1; int op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -900,35 +970,33 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(ftruncate,out); - - ALLOC_OR_GOTO (local, afr_local_t, out); - ret = AFR_LOCAL_INIT (local, priv); - - if (ret < 0) { - op_errno = -ret; + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; goto out; } + QUORUM_CHECK(ftruncate,out); - frame->local = local; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; local->cont.ftruncate.offset = offset; local->fd = fd_ref (fd); - local->fop_call_continue = afr_do_ftruncate; - ret = afr_open_fd_fix (frame, this, _gf_true); - if (ret) { - op_errno = -ret; - goto out; - } + afr_open_fd_fix (fd, this); - op_ret = 0; + afr_do_ftruncate (frame, this); + + ret = 0; out: - if (op_ret == -1) { + if (ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL); + AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); } return 0; @@ -957,8 +1025,9 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this) if (main_frame) { AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno, - &local->cont.setattr.preop_buf, - &local->cont.setattr.postop_buf); + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); } return 0; @@ -968,7 +1037,7 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this) int afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { afr_local_t * local = NULL; afr_private_t * priv = NULL; @@ -988,29 +1057,14 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->read_child_returned = _gf_true; } - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.setattr.preop_buf = *preop; - local->cont.setattr.postop_buf = *postop; - } - - if (child_index == read_child) { - local->cont.setattr.preop_buf = *preop; - local->cont.setattr.postop_buf = *postop; - } - - local->success_count++; + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, preop, postop, + xdata); - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } + if ((local->success_count >= priv->wait_count) + && local->read_child_returned) { + need_unwind = 1; } - local->op_errno = op_errno; } UNLOCK (&frame->lock); @@ -1056,7 +1110,8 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this) priv->children[i]->fops->setattr, &local->loc, &local->cont.setattr.in_buf, - local->cont.setattr.valid); + local->cont.setattr.valid, + NULL); if (!--call_count) break; @@ -1084,13 +1139,12 @@ afr_setattr_done (call_frame_t *frame, xlator_t *this) int afr_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *buf, int32_t valid) + loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; call_frame_t *transaction_frame = NULL; int ret = -1; - int op_ret = -1; int op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -1099,24 +1153,20 @@ afr_setattr (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(setattr,out); + QUORUM_CHECK(setattr,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { + op_errno = ENOMEM; goto out; } - ALLOC_OR_GOTO (local, afr_local_t, out); + AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); + local = transaction_frame->local; - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) goto out; - } - - transaction_frame->local = local; - - local->op_ret = -1; local->cont.setattr.in_buf = *buf; local->cont.setattr.valid = valid; @@ -1131,14 +1181,18 @@ afr_setattr (call_frame_t *frame, xlator_t *this, local->transaction.start = LLONG_MAX - 1; local->transaction.len = 0; - afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - op_ret = 0; + ret = 0; out: - if (op_ret == -1) { + if (ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, NULL); + AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); } return 0; @@ -1165,8 +1219,9 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this) if (main_frame) { AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno, - &local->cont.fsetattr.preop_buf, - &local->cont.fsetattr.postop_buf); + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); } return 0; @@ -1176,7 +1231,7 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this) int afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { afr_local_t * local = NULL; afr_private_t * priv = NULL; @@ -1196,29 +1251,14 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->read_child_returned = _gf_true; } - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.fsetattr.preop_buf = *preop; - local->cont.fsetattr.postop_buf = *postop; - } - - if (child_index == read_child) { - local->cont.fsetattr.preop_buf = *preop; - local->cont.fsetattr.postop_buf = *postop; - } + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, preop, postop, + xdata); - local->success_count++; - - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } + if ((local->success_count >= priv->wait_count) + && local->read_child_returned) { + need_unwind = 1; } - local->op_errno = op_errno; } UNLOCK (&frame->lock); @@ -1264,7 +1304,8 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this) priv->children[i]->fops->fsetattr, local->fd, &local->cont.fsetattr.in_buf, - local->cont.fsetattr.valid); + local->cont.fsetattr.valid, + NULL); if (!--call_count) break; @@ -1291,13 +1332,12 @@ afr_fsetattr_done (call_frame_t *frame, xlator_t *this) int afr_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *buf, int32_t valid) + fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; call_frame_t *transaction_frame = NULL; int ret = -1; - int op_ret = -1; int op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -1306,7 +1346,12 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(fsetattr,out); + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } + + QUORUM_CHECK(fsetattr,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1314,16 +1359,12 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this, goto out; } - ALLOC_OR_GOTO (local, afr_local_t, out); - transaction_frame->local = local; + AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); + local = transaction_frame->local; - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) goto out; - } - - local->op_ret = -1; local->cont.fsetattr.in_buf = *buf; local->cont.fsetattr.valid = valid; @@ -1334,25 +1375,24 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this, local->fd = fd_ref (fd); - op_ret = afr_open_fd_fix (transaction_frame, this, _gf_false); - if (ret) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } + afr_open_fd_fix (fd, this); local->transaction.main_frame = frame; local->transaction.start = LLONG_MAX - 1; local->transaction.len = 0; - afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - op_ret = 0; + ret = 0; out: - if (op_ret == -1) { + if (ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL); + AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); } return 0; @@ -1380,38 +1420,34 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this) if (main_frame) { AFR_STACK_UNWIND (setxattr, main_frame, - local->op_ret, local->op_errno) - } + local->op_ret, local->op_errno, + NULL); + } return 0; } int afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int call_count = -1; - int need_unwind = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int need_unwind = 0; + int child_index = (long) cookie; local = frame->local; priv = this->private; LOCK (&frame->lock); { - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; - - if (local->success_count == priv->child_count) { - need_unwind = 1; - } + __inode_write_fop_cbk (frame, child_index, -1, this, + &op_ret, &op_errno, NULL, NULL, + xdata); + if (local->success_count == priv->child_count) { + need_unwind = 1; } - - local->op_errno = op_errno; } UNLOCK (&frame->lock); @@ -1431,10 +1467,10 @@ afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int afr_setxattr_wind (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; local = frame->local; priv = this->private; @@ -1457,7 +1493,8 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this) priv->children[i]->fops->setxattr, &local->loc, local->cont.setxattr.dict, - local->cont.setxattr.flags); + local->cont.setxattr.flags, + NULL); if (!--call_count) break; @@ -1471,7 +1508,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this) int afr_setxattr_done (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = frame->local; + afr_local_t *local = frame->local; local->transaction.unwind (frame, this); @@ -1482,39 +1519,40 @@ afr_setxattr_done (call_frame_t *frame, xlator_t *this) int afr_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int32_t flags) + loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_ret = -1; - int op_errno = 0; + int ret = -1; + int op_errno = EINVAL; - VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - priv = this->private; + GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict, + op_errno, out); - QUORUM_CHECK(setxattr,out); + GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict, + op_errno, out); - ALLOC_OR_GOTO (local, afr_local_t, out); + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this->private, out); - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } + priv = this->private; + QUORUM_CHECK(setxattr,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { + op_errno = ENOMEM; goto out; } - transaction_frame->local = local; + AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); + local = transaction_frame->local; - local->op_ret = -1; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; local->cont.setxattr.dict = dict_ref (dict); local->cont.setxattr.flags = flags; @@ -1529,14 +1567,211 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, local->transaction.start = LLONG_MAX - 1; local->transaction.len = 0; - afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - op_ret = 0; + ret = 0; out: - if (op_ret == -1) { + if (ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno); + AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + } + + return 0; +} + +/* {{{ fsetxattr */ + + +int +afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (fsetxattr, main_frame, + local->op_ret, local->op_errno, + NULL); + } + return 0; +} + + +int +afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int need_unwind = 0; + int child_index = (long) cookie; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + + __inode_write_fop_cbk (frame, child_index, -1, this, + &op_ret, &op_errno, NULL, NULL, + xdata); + if (local->success_count == priv->child_count) { + need_unwind = 1; + } + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_pre_op_done_children_count (local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fsetxattr, + local->fd, + local->cont.fsetxattr.dict, + local->cont.fsetxattr.flags, + NULL); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_fsetxattr_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + +int +afr_fsetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict, + op_errno, out); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict, + op_errno, out); + + priv = this->private; + + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } + + QUORUM_CHECK(fsetxattr,out); + + AFR_LOCAL_ALLOC_OR_GOTO (local, out); + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + goto out; + } + + transaction_frame->local = local; + + local->op_ret = -1; + + local->cont.fsetxattr.dict = dict_ref (dict); + local->cont.fsetxattr.flags = flags; + + local->transaction.fop = afr_fsetxattr_wind; + local->transaction.done = afr_fsetxattr_done; + local->transaction.unwind = afr_fsetxattr_unwind; + + local->fd = fd_ref (fd); + + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; + + ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + ret = 0; +out: + if (ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); } return 0; @@ -1544,6 +1779,7 @@ out: /* }}} */ + /* {{{ removexattr */ @@ -1565,38 +1801,34 @@ afr_removexattr_unwind (call_frame_t *frame, xlator_t *this) if (main_frame) { AFR_STACK_UNWIND (removexattr, main_frame, - local->op_ret, local->op_errno) - } + local->op_ret, local->op_errno, + NULL); + } return 0; } int afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int call_count = -1; - int need_unwind = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int need_unwind = 0; + int child_index = (long) cookie; local = frame->local; priv = this->private; LOCK (&frame->lock); { - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; - - if (local->success_count == priv->wait_count) { - need_unwind = 1; - } + __inode_write_fop_cbk (frame, child_index, -1, this, + &op_ret, &op_errno, NULL, NULL, + xdata); + if (local->success_count == priv->wait_count) { + need_unwind = 1; } - - local->op_errno = op_errno; } UNLOCK (&frame->lock); @@ -1641,7 +1873,8 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this) priv->children[i], priv->children[i]->fops->removexattr, &local->loc, - local->cont.removexattr.name); + local->cont.removexattr.name, + NULL); if (!--call_count) break; @@ -1667,7 +1900,192 @@ afr_removexattr_done (call_frame_t *frame, xlator_t *this) int afr_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (this, out); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*", + name, op_errno, out); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*", + name, op_errno, out); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + + QUORUM_CHECK(removexattr,out); + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + op_errno = ENOMEM; + goto out; + } + + AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); + local = transaction_frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; + + local->cont.removexattr.name = gf_strdup (name); + + local->transaction.fop = afr_removexattr_wind; + local->transaction.done = afr_removexattr_done; + local->transaction.unwind = afr_removexattr_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; + + ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + ret = 0; +out: + if (ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); + } + + return 0; +} + +/* ffremovexattr */ +int +afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (fremovexattr, main_frame, + local->op_ret, local->op_errno, + NULL); + } + return 0; +} + + +int +afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + int call_count = -1; + int need_unwind = 0; + int child_index = (long) cookie; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + __inode_write_fop_cbk (frame, child_index, -1, this, + &op_ret, &op_errno, NULL, NULL, + xdata); + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_pre_op_done_children_count (local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fremovexattr, + local->fd, + local->cont.removexattr.name, + NULL); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_fremovexattr_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; @@ -1676,23 +2094,33 @@ afr_removexattr (call_frame_t *frame, xlator_t *this, int op_ret = -1; int op_errno = 0; - VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*", + name, op_errno, out); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*", + name, op_errno, out); + + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); priv = this->private; + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } - QUORUM_CHECK(removexattr,out); + QUORUM_CHECK(fremovexattr, out); transaction_frame = copy_frame (frame); if (!transaction_frame) { goto out; } - ALLOC_OR_GOTO (local, afr_local_t, out); + AFR_LOCAL_ALLOC_OR_GOTO (local, out); - ret = AFR_LOCAL_INIT (local, priv); + ret = afr_local_init (local, priv, &op_errno); if (ret < 0) { op_errno = -ret; goto out; @@ -1704,25 +2132,730 @@ afr_removexattr (call_frame_t *frame, xlator_t *this, local->cont.removexattr.name = gf_strdup (name); - local->transaction.fop = afr_removexattr_wind; - local->transaction.done = afr_removexattr_done; - local->transaction.unwind = afr_removexattr_unwind; + local->transaction.fop = afr_fremovexattr_wind; + local->transaction.done = afr_fremovexattr_done; + local->transaction.unwind = afr_fremovexattr_unwind; - loc_copy (&local->loc, loc); + local->fd = fd_ref (fd); local->transaction.main_frame = frame; local->transaction.start = LLONG_MAX - 1; local->transaction.len = 0; - afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + op_ret = 0; +out: + if (op_ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); + } + + return 0; +} + +static int +afr_fallocate_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, + local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); + } + return 0; +} + +static int +afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + int read_child = 0; + + local = frame->local; + priv = this->private; + + read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + + LOCK (&frame->lock); + { + if (child_index == read_child) { + local->read_child_returned = _gf_true; + } + + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, prebuf, postbuf, + xdata); + + if ((local->success_count >= priv->wait_count) + && local->read_child_returned) { + need_unwind = 1; + } + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + +static int +afr_fallocate_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_pre_op_done_children_count (local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fallocate, + local->fd, + local->cont.fallocate.mode, + local->cont.fallocate.offset, + local->cont.fallocate.len, + NULL); + + if (!--call_count) + break; + } + } + + return 0; +} + +static int +afr_fallocate_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + +static int +afr_do_fallocate (call_frame_t *frame, xlator_t *this) +{ + call_frame_t * transaction_frame = NULL; + afr_local_t * local = NULL; + int op_ret = -1; + int op_errno = 0; + + local = frame->local; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + goto out; + } + + transaction_frame->local = local; + frame->local = NULL; + + local->op = GF_FOP_FALLOCATE; + + local->transaction.fop = afr_fallocate_wind; + local->transaction.done = afr_fallocate_done; + local->transaction.unwind = afr_fallocate_unwind; + + local->transaction.main_frame = frame; + + local->transaction.start = local->cont.fallocate.offset; + local->transaction.len = 0; + + /* fallocate can modify the file size */ + op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + op_ret = 0; +out: + if (op_ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL, + NULL, NULL); + } + + return 0; +} + +int +afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } + QUORUM_CHECK(fallocate,out); + + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; + + local->cont.fallocate.mode = mode; + local->cont.fallocate.offset = offset; + local->cont.fallocate.len = len; + + local->fd = fd_ref (fd); + + afr_open_fd_fix (fd, this); + + afr_do_fallocate (frame, this); + + ret = 0; +out: + if (ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ discard */ + +static int +afr_discard_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (discard, main_frame, local->op_ret, + local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, + NULL); + } + return 0; +} + +static int +afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + int read_child = 0; + + local = frame->local; + priv = this->private; + + read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + + LOCK (&frame->lock); + { + if (child_index == read_child) { + local->read_child_returned = _gf_true; + } + + __inode_write_fop_cbk (frame, child_index, read_child, this, + &op_ret, &op_errno, prebuf, postbuf, + xdata); + + if ((local->success_count >= priv->wait_count) + && local->read_child_returned) { + need_unwind = 1; + } + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + +static int +afr_discard_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_pre_op_done_children_count (local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->discard, + local->fd, + local->cont.discard.offset, + local->cont.discard.len, + NULL); + + if (!--call_count) + break; + } + } + + return 0; +} + +static int +afr_discard_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + +static int +afr_do_discard (call_frame_t *frame, xlator_t *this) +{ + call_frame_t * transaction_frame = NULL; + afr_local_t * local = NULL; + int op_ret = -1; + int op_errno = 0; + + local = frame->local; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + goto out; + } + + transaction_frame->local = local; + frame->local = NULL; + + local->op = GF_FOP_DISCARD; + + local->transaction.fop = afr_discard_wind; + local->transaction.done = afr_discard_done; + local->transaction.unwind = afr_discard_unwind; + + local->transaction.main_frame = frame; + + local->transaction.start = local->cont.discard.offset; + local->transaction.len = 0; + + op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } op_ret = 0; out: - if (op_ret == -1) { + if (op_ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL, + NULL, NULL); + } + + return 0; +} + +int +afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } + QUORUM_CHECK(discard, out); + + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; + + local->cont.discard.offset = offset; + local->cont.discard.len = len; + + local->fd = fd_ref (fd); + + afr_open_fd_fix (fd, this); + + afr_do_discard(frame, this); + + ret = 0; +out: + if (ret < 0) { if (transaction_frame) AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (removexattr, frame, op_ret, op_errno); + AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); + } + + return 0; +} + + +/* {{{ zerofill */ + +static int +afr_zerofill_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; } + UNLOCK (&frame->lock); + if (main_frame) { + AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret, + local->op_errno, + &local->cont.zerofill.prebuf, + &local->cont.zerofill.postbuf, + NULL); + } return 0; } + +static int +afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + int read_child = 0; + + local = frame->local; + priv = this->private; + + read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + + LOCK (&frame->lock); + { + if (child_index == read_child) { + local->read_child_returned = _gf_true; + } + + if (afr_fop_failed (op_ret, op_errno)) { + afr_transaction_fop_failed (frame, this, child_index); + } + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.zerofill.prebuf = *prebuf; + local->cont.zerofill.postbuf = *postbuf; + } + + if (child_index == read_child) { + local->cont.zerofill.prebuf = *prebuf; + local->cont.zerofill.postbuf = *postbuf; + } + + local->success_count++; + + if ((local->success_count >= priv->wait_count) + && local->read_child_returned) { + need_unwind = 1; + } + } + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) { + local->transaction.unwind (frame, this); + } + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + +static int +afr_zerofill_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_pre_op_done_children_count (local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->zerofill, + local->fd, + local->cont.zerofill.offset, + local->cont.zerofill.len, + NULL); + + if (!--call_count) + break; + } + } + + return 0; +} + +static int +afr_zerofill_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + +static int +afr_do_zerofill(call_frame_t *frame, xlator_t *this) +{ + call_frame_t *transaction_frame = NULL; + afr_local_t *local = NULL; + int op_ret = -1; + int op_errno = 0; + + local = frame->local; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + goto out; + } + + transaction_frame->local = local; + frame->local = NULL; + + local->op = GF_FOP_ZEROFILL; + + local->transaction.fop = afr_zerofill_wind; + local->transaction.done = afr_zerofill_done; + local->transaction.unwind = afr_zerofill_unwind; + + local->transaction.main_frame = frame; + + local->transaction.start = local->cont.zerofill.offset; + local->transaction.len = 0; + + op_ret = afr_transaction (transaction_frame, this, + AFR_DATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + op_ret = 0; +out: + if (op_ret < 0) { + if (transaction_frame) { + AFR_STACK_DESTROY (transaction_frame); + } + AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL, + NULL, NULL); + } + + return 0; +} + +int +afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } + QUORUM_CHECK(zerofill, out); + + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) { + goto out; + } + local->cont.zerofill.offset = offset; + local->cont.zerofill.len = len; + + local->fd = fd_ref (fd); + + afr_open_fd_fix (fd, this); + + afr_do_zerofill(frame, this); + + ret = 0; +out: + if (ret < 0) { + if (transaction_frame) { + AFR_STACK_DESTROY (transaction_frame); + } + AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, + NULL, NULL); + } + + return 0; +} + +/* }}} */ + + |
