diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-inode-write.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 3672 |
1 files changed, 2046 insertions, 1626 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index a83223569d0..1d6e4f3570a 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -8,2138 +8,2558 @@ cases as published by the Free Software Foundation. */ - -#include <libgen.h> #include <unistd.h> -#include <fnmatch.h> #include <sys/time.h> #include <stdlib.h> #include <signal.h> -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "afr.h" -#include "dict.h" -#include "xlator.h" -#include "hashfn.h" -#include "logging.h" -#include "stack.h" -#include "list.h" -#include "call-stub.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" - +#include <glusterfs/glusterfs.h> #include "afr.h" +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include <glusterfs/defaults.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include "protocol-common.h" +#include <glusterfs/byte-order.h> #include "afr-transaction.h" -#include "afr-self-heal-common.h" +#include "afr-self-heal.h" +#include "afr-messages.h" -/* {{{ writev */ +static void +__afr_inode_write_finalize(call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int ret = 0; + int read_subvol = 0; + struct iatt *stbuf = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_read_subvol_args_t args = { + 0, + }; + + local = frame->local; + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, local->inode, out); + + /*This code needs to stay till DHT sends fops on linked + * inodes*/ + if (!inode_is_linked(local->inode)) { + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret == -1) + continue; + if (!gf_uuid_is_null(local->replies[i].poststat.ia_gfid)) { + gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid); + args.ia_type = local->replies[i].poststat.ia_type; + break; + } else { + ret = dict_get_bin(local->replies[i].xdata, + DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); + if (ret) + continue; + gf_uuid_copy(args.gfid, stbuf->ia_gfid); + args.ia_type = stbuf->ia_type; + break; + } + } + } + + if (local->transaction.type == AFR_METADATA_TRANSACTION) { + read_subvol = afr_metadata_subvol_get(local->inode, this, NULL, + local->readable, NULL, &args); + } else { + read_subvol = afr_data_subvol_get(local->inode, this, NULL, + local->readable, NULL, &args); + } + + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); + afr_pick_error_xdata(local, priv, local->inode, local->readable, NULL, + NULL); + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret < 0) + continue; + + /* Order of checks in the compound conditional + below is important. + + - Highest precedence: largest op_ret + - Next precedence: if all op_rets are equal, read subvol + - Least precedence: any succeeded subvol + */ + if ((local->op_ret < local->replies[i].op_ret) || + ((local->op_ret == local->replies[i].op_ret) && + (i == read_subvol))) { + local->op_ret = local->replies[i].op_ret; + local->op_errno = local->replies[i].op_errno; + + local->cont.inode_wfop.prebuf = local->replies[i].prestat; + local->cont.inode_wfop.postbuf = local->replies[i].poststat; + + if (local->replies[i].xdata) { + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); + local->xdata_rsp = dict_ref(local->replies[i].xdata); + } + if (local->replies[i].xattr) { + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); + local->xattr_rsp = dict_ref(local->replies[i].xattr); + } + } + } + + afr_set_in_flight_sb_status(this, frame, local->inode); +out: + return; +} -void -afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame) +static void +__afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xattr, dict_t *xdata) { - afr_local_t *src_local = NULL; - afr_local_t *dst_local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + local->replies[child_index].valid = 1; + + if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1) + op_ret = iov_length(local->cont.writev.vector, + local->cont.writev.count); + + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + if (xdata) + local->replies[child_index].xdata = dict_ref(xdata); + + if (op_ret >= 0) { + if (prebuf) + local->replies[child_index].prestat = *prebuf; + if (postbuf) + local->replies[child_index].poststat = *postbuf; + if (xattr) + local->replies[child_index].xattr = dict_ref(xattr); + } else { + afr_transaction_fop_failed(frame, this, child_index); + } + + return; +} + +static int +__afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xattr, dict_t *xdata) +{ + afr_local_t *local = NULL; + int child_index = (long)cookie; + int call_count = -1; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + + LOCK(&frame->lock); + { + __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, + prebuf, postbuf, xattr, xdata); + call_count = --local->call_count; + } + UNLOCK(&frame->lock); + + if (call_count == 0) { + __afr_inode_write_finalize(frame, this); - src_local = src_frame->local; - dst_local = dst_frame->local; + if (afr_txn_nothing_failed(frame, this)) { + /*if it did pre-op, it will do post-op changing ctime*/ + if (priv->consistent_metadata && afr_needs_changelog_update(local)) + afr_zero_fill_stat(local); + local->transaction.unwind(frame, this); + } + + afr_transaction_resume(frame, this); + } - dst_local->op_ret = src_local->op_ret; - dst_local->op_errno = src_local->op_errno; - dst_local->cont.writev.prebuf = src_local->cont.writev.prebuf; - dst_local->cont.writev.postbuf = src_local->cont.writev.postbuf; + return 0; } +/* {{{ writev */ + void -afr_writev_unwind (call_frame_t *frame, xlator_t *this) +afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame) { - afr_local_t * local = NULL; - local = frame->local; - - AFR_STACK_UNWIND (writev, frame, - local->op_ret, local->op_errno, - &local->cont.writev.prebuf, - &local->cont.writev.postbuf, - NULL); + afr_local_t *src_local = NULL; + afr_local_t *dst_local = NULL; + + src_local = src_frame->local; + dst_local = dst_frame->local; + + dst_local->op_ret = src_local->op_ret; + dst_local->op_errno = src_local->op_errno; + dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf; + dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf; + if (src_local->xdata_rsp) + dst_local->xdata_rsp = dict_ref(src_local->xdata_rsp); } -call_frame_t* -afr_transaction_detach_fop_frame (call_frame_t *frame) +void +afr_writev_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *fop_frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - fop_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); + if (priv->consistent_metadata) + afr_zero_fill_stat(local); - return fop_frame; + AFR_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); } int -afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this) +afr_transaction_writev_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *fop_frame = NULL; + call_frame_t *fop_frame = NULL; - fop_frame = afr_transaction_detach_fop_frame (frame); + fop_frame = afr_transaction_detach_fop_frame(frame); - if (fop_frame) { - afr_writev_copy_outvars (frame, fop_frame); - afr_writev_unwind (fop_frame, this); - } - return 0; + if (fop_frame) { + afr_writev_copy_outvars(frame, fop_frame); + afr_writev_unwind(fop_frame, this); + } + return 0; } static void -afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this) +afr_writev_handle_short_writes(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - - local = frame->local; - priv = this->private; - /* - * We already have the best case result of the writev calls staged - * as the return value. Any writev that returns some value less - * than the best case is now out of sync, so mark the fop as - * failed. Note that fops that have returned with errors have - * already been marked as failed. - */ - for (i = 0; i < priv->child_count; i++) { - if ((!local->replies[i].valid) || - (local->replies[i].op_ret == -1)) - continue; - - if (local->replies[i].op_ret < local->op_ret) - afr_transaction_fop_failed(frame, this, i); - } + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + + local = frame->local; + priv = this->private; + /* + * We already have the best case result of the writev calls staged + * as the return value. Any writev that returns some value less + * than the best case is now out of sync, so mark the fop as + * failed. Note that fops that have returned with errors have + * already been marked as failed. + */ + for (i = 0; i < priv->child_count; i++) { + if ((!local->replies[i].valid) || (local->replies[i].op_ret == -1)) + continue; + + if (local->replies[i].op_ret < local->op_ret) + afr_transaction_fop_failed(frame, this, i); + } } -int -afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +void +afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - afr_local_t * local = NULL; - call_frame_t *fop_frame = NULL; - int child_index = (long) cookie; - int call_count = -1; - int read_child = 0; - - local = frame->local; - - read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); - - LOCK (&frame->lock); - { - if (child_index == read_child) { - local->read_child_returned = _gf_true; - } - - local->replies[child_index].valid = 1; - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - - /* stage the best case return value for unwind */ - if ((local->success_count == 0) || (op_ret > local->op_ret)) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - - if (op_ret != -1) { - if ((local->success_count == 0) || - (child_index == read_child)) { - local->cont.writev.prebuf = *prebuf; - local->cont.writev.postbuf = *postbuf; - } - local->success_count++; - } - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - - afr_writev_handle_short_writes (frame, this); - /* - * Generally inode-write fops do transaction.unwind then - * transaction.resume, but writev needs to make sure that - * delayed post-op frame is placed in fdctx before unwind - * happens. This prevents the race of flush doing the - * changelog wakeup first in fuse thread and then this - * writev placing its delayed post-op frame in fdctx. - * This helps flush make sure all the delayed post-ops are - * completed. - */ - - fop_frame = afr_transaction_detach_fop_frame (frame); - afr_writev_copy_outvars (frame, fop_frame); - local->transaction.resume (frame, this); - afr_writev_unwind (fop_frame, this); + int ret = 0; + afr_local_t *local = frame->local; + uint32_t open_fd_count = 0; + uint32_t write_is_append = 0; + int32_t num_inodelks = 0; + + LOCK(&frame->lock); + { + __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, + prebuf, postbuf, NULL, xdata); + if (op_ret == -1 || !xdata) + goto unlock; + + write_is_append = 0; + ret = dict_get_uint32(xdata, GLUSTERFS_WRITE_IS_APPEND, + &write_is_append); + if (ret || !write_is_append) + local->append_write = _gf_false; + + ret = dict_get_uint32(xdata, GLUSTERFS_ACTIVE_FD_COUNT, &open_fd_count); + if (ret < 0) + goto unlock; + if (open_fd_count > local->open_fd_count) { + local->open_fd_count = open_fd_count; + local->update_open_fd_count = _gf_true; } - return 0; + + ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT, + &num_inodelks); + if (ret < 0) + goto unlock; + if (num_inodelks > local->num_inodelks) { + local->num_inodelks = num_inodelks; + local->update_num_inodelks = _gf_true; + } + } +unlock: + UNLOCK(&frame->lock); } -int -afr_writev_wind (call_frame_t *frame, xlator_t *this) +void +afr_process_post_writev(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = -1; - - local = frame->local; - priv = this->private; + afr_local_t *local = NULL; + afr_lock_t *lock = NULL; + + local = frame->local; + + if (!local->stable_write && !local->append_write) + /* An appended write removes the necessity to + fsync() the file. This is because self-heal + has the logic to check for larger file when + the xattrs are not reliably pointing at + a stale file. + */ + afr_fd_report_unstable_write(this, local); + + __afr_inode_write_finalize(frame, this); + + afr_writev_handle_short_writes(frame, this); + + if (local->update_open_fd_count) + local->inode_ctx->open_fd_count = local->open_fd_count; + if (local->update_num_inodelks && + local->transaction.type == AFR_DATA_TRANSACTION) { + lock = &local->inode_ctx->lock[local->transaction.type]; + lock->num_inodelks = local->num_inodelks; + } +} - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); +int +afr_writev_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_frame_t *fop_frame = NULL; + int child_index = (long)cookie; + int call_count = -1; - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } + afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, prebuf, + postbuf, xdata); - local->call_count = call_count; - local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies), - gf_afr_mt_reply_t); - if (!local->replies) { - local->op_ret = -1; - local->op_errno = ENOMEM; - local->transaction.unwind(frame, this); - local->transaction.resume(frame, this); - return 0; - } + call_count = afr_frame_return(frame); - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->writev, - local->fd, - local->cont.writev.vector, - local->cont.writev.count, - local->cont.writev.offset, - local->cont.writev.flags, - local->cont.writev.iobref, - NULL); - - if (!--call_count) - break; - } - } + if (call_count == 0) { + afr_process_post_writev(frame, this); - return 0; + if (!afr_txn_nothing_failed(frame, this)) { + // Don't unwind until post-op is complete + afr_transaction_resume(frame, this); + } else { + /* + * Generally inode-write fops do transaction.unwind then + * transaction.resume, but writev needs to make sure that + * delayed post-op frame is placed in fdctx before unwind + * happens. This prevents the race of flush doing the + * changelog wakeup first in fuse thread and then this + * writev placing its delayed post-op frame in fdctx. + * This helps flush make sure all the delayed post-ops are + * completed. + */ + + fop_frame = afr_transaction_detach_fop_frame(frame); + afr_writev_copy_outvars(frame, fop_frame); + afr_transaction_resume(frame, this); + afr_writev_unwind(fop_frame, this); + } + } + return 0; } +static int +afr_arbiter_writev_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + static char byte = 0xFF; + static struct iovec vector = {&byte, 1}; + int32_t count = 1; + + STACK_WIND_COOKIE( + frame, afr_writev_wind_cbk, (void *)(long)subvol, + priv->children[subvol], priv->children[subvol]->fops->writev, local->fd, + &vector, count, local->cont.writev.offset, local->cont.writev.flags, + local->cont.writev.iobref, local->xdata_req); + + return 0; +} int -afr_writev_done (call_frame_t *frame, xlator_t *this) +afr_writev_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - - local = frame->local; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - iobref_unref (local->cont.writev.iobref); - local->cont.writev.iobref = NULL; - - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); + local = frame->local; + priv = this->private; + if (AFR_IS_ARBITER_BRICK(priv, subvol)) { + afr_arbiter_writev_wind(frame, this, subvol); return 0; + } + + STACK_WIND_COOKIE(frame, afr_writev_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->writev, local->fd, + local->cont.writev.vector, local->cont.writev.count, + local->cont.writev.offset, local->cont.writev.flags, + local->cont.writev.iobref, local->xdata_req); + return 0; } - int -afr_do_writev (call_frame_t *frame, xlator_t *this) +afr_do_writev(call_frame_t *frame, xlator_t *this) { - call_frame_t *transaction_frame = NULL; - afr_local_t *local = NULL; - int op_ret = -1; - int op_errno = 0; + call_frame_t *transaction_frame = NULL; + afr_local_t *local = NULL; + int ret = -1; + int op_errno = ENOMEM; - local = frame->local; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } + local = frame->local; + transaction_frame->local = local; + frame->local = NULL; - transaction_frame->local = local; - AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + if (!AFR_FRAME_INIT(frame, op_errno)) + goto out; - local->op = GF_FOP_WRITE; + local->op = GF_FOP_WRITE; - local->success_count = 0; + local->transaction.wind = afr_writev_wind; + local->transaction.unwind = afr_transaction_writev_unwind; - local->transaction.fop = afr_writev_wind; - local->transaction.done = afr_writev_done; - local->transaction.unwind = afr_transaction_writev_unwind; + local->transaction.main_frame = frame; - local->transaction.main_frame = frame; - if (local->fd->flags & O_APPEND) { - /* - * Backend vfs ignores the 'offset' for append mode fd so - * locking just the region provided for the writev does not - * give consistency gurantee. The actual write may happen at a - * completely different range than the one provided by the - * offset, len in the fop. So lock the entire file. - */ - local->transaction.start = 0; - local->transaction.len = 0; - } else { - local->transaction.start = local->cont.writev.offset; - local->transaction.len = iov_length (local->cont.writev.vector, - local->cont.writev.count); - } - - op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (op_ret < 0) { - op_errno = -op_ret; - goto out; - } - - op_ret = 0; + if (local->fd->flags & O_APPEND) { + /* + * Backend vfs ignores the 'offset' for append mode fd so + * locking just the region provided for the writev does not + * give consistency guarantee. The actual write may happen at a + * completely different range than the one provided by the + * offset, len in the fop. So lock the entire file. + */ + local->transaction.start = 0; + local->transaction.len = 0; + } else { + local->transaction.start = local->cont.writev.offset; + local->transaction.len = iov_length(local->cont.writev.vector, + local->cont.writev.count); + } + + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (op_ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } -static void -afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this) -{ - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - char *reason = NULL; - int32_t op_errno = 0; - int ret = 0; - - if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { - gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: " - "fd: %p, inode: %p", fd, - fd ? fd->inode : NULL); - goto out; - } - - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; +int +afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + afr_local_t *local = NULL; + int op_errno = ENOMEM; + int ret = -1; + + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->cont.writev.vector = iov_dup(vector, count); + if (!local->cont.writev.vector) + goto out; + local->cont.writev.count = count; + local->cont.writev.offset = offset; + local->cont.writev.flags = flags; + local->cont.writev.iobref = iobref_ref(iobref); + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; + + if (dict_set_uint32(local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) { + op_errno = ENOMEM; + goto out; + } + + if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT, + this->name)) { + op_errno = ENOMEM; + goto out; + } + + if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) { + op_errno = ENOMEM; + goto out; + } + + /* Set append_write to be true speculatively. If on any + server it turns not be true, we unset it in the + callback. + */ + local->append_write = _gf_true; + + /* detect here, but set it in writev_wind_cbk *after* the unstable + write is performed + */ + local->stable_write = !!((fd->flags | flags) & (O_SYNC | O_DSYNC)); + + afr_fix_open(fd, this); + + afr_do_writev(frame, this); + + return 0; +out: + AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); - AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); - local = frame->local; - ret = afr_local_init (local, this->private, &op_errno); - if (ret < 0) - goto out; + return 0; +} - local->loc.inode = inode_ref (fd->inode); - ret = loc_path (&local->loc, NULL); - if (ret < 0) - goto out; +/* }}} */ - sh = &local->self_heal; - sh->do_metadata_self_heal = _gf_true; - if (fd->inode->ia_type == IA_IFREG) - sh->do_data_self_heal = _gf_true; - else if (fd->inode->ia_type == IA_IFDIR) - sh->do_entry_self_heal = _gf_true; - - reason = "subvolume came online"; - afr_launch_self_heal (frame, this, fd->inode, _gf_true, - fd->inode->ia_type, reason, NULL, NULL); - return; -out: - AFR_STACK_DESTROY (frame); -} +/* {{{ truncate */ -void -afr_open_fd_fix (fd_t *fd, xlator_t *this) +int +afr_truncate_unwind(call_frame_t *frame, xlator_t *this) { - int ret = 0; - int i = 0; - afr_fd_ctx_t *fd_ctx = NULL; - gf_boolean_t need_self_heal = _gf_false; - int *need_open = NULL; - size_t need_open_count = 0; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - priv = this->private; + local = frame->local; - if (!afr_is_fd_fixable (fd)) - goto out; - - fd_ctx = afr_fd_ctx_get (fd, this); - if (!fd_ctx) - goto out; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) + return 0; - LOCK (&fd->lock); - { - if (fd_ctx->up_count < priv->up_count) { - need_self_heal = _gf_true; - fd_ctx->up_count = priv->up_count; - fd_ctx->down_count = priv->down_count; - } + AFR_STACK_UNWIND(truncate, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} - need_open = alloca (priv->child_count * sizeof (*need_open)); - for (i = 0; i < priv->child_count; i++) { - need_open[i] = 0; - if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED) - continue; +int +afr_truncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t *local = NULL; - if (!priv->child_up[i]) - continue; + local = frame->local; - fd_ctx->opened_on[i] = AFR_FD_OPENING; + if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size) + local->stable_write = _gf_false; - need_open[i] = 1; - need_open_count++; - } - } - UNLOCK (&fd->lock); - if (ret) - goto out; + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); +} - if (need_self_heal) - afr_trigger_open_fd_self_heal (fd, this); +int +afr_truncate_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - if (!need_open_count) - goto out; + local = frame->local; + priv = this->private; - afr_fix_open (this, fd, need_open_count, need_open); -out: - return; + STACK_WIND_COOKIE(frame, afr_truncate_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->truncate, &local->loc, + local->cont.truncate.offset, local->xdata_req); + return 0; } int -afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) +afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - int ret = -1; - int op_errno = 0; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - priv = this->private; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - if (afr_is_split_brain (this, fd->inode)) { - op_errno = EIO; - goto out; - } + local->cont.truncate.offset = offset; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - QUORUM_CHECK(writev,out); + if (!local->xdata_req) + goto out; - AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); - local = frame->local; + local->transaction.wind = afr_truncate_wind; + local->transaction.unwind = afr_truncate_unwind; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local->cont.writev.vector = iov_dup (vector, count); - local->cont.writev.count = count; - local->cont.writev.offset = offset; - local->cont.writev.flags = flags; - local->cont.writev.iobref = iobref_ref (iobref); + local->op = GF_FOP_TRUNCATE; - local->fd = fd_ref (fd); + local->transaction.main_frame = frame; + local->transaction.start = offset; + local->transaction.len = 0; - afr_open_fd_fix (fd, this); + /* Set it true speculatively, will get reset in afr_truncate_wind_cbk + if truncate was not a NOP */ + local->stable_write = _gf_true; - afr_do_writev (frame, this); + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - ret = 0; + return 0; out: - if (ret < 0) - AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - /* }}} */ -/* {{{ truncate */ +/* {{{ ftruncate */ int -afr_truncate_unwind (call_frame_t *frame, xlator_t *this) +afr_ftruncate_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, - local->op_errno, - &local->cont.truncate.prebuf, - &local->cont.truncate.postbuf, - NULL); - } + local = frame->local; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(ftruncate, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +afr_ftruncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int child_index = (long) cookie; - int read_child = 0; - int call_count = -1; - int need_unwind = 0; - - local = frame->local; - priv = this->private; - - read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL); - - LOCK (&frame->lock); - { - if (child_index == read_child) { - local->read_child_returned = _gf_true; - } - - if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG) - afr_transaction_fop_failed (frame, this, child_index); - - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.truncate.prebuf = *prebuf; - local->cont.truncate.postbuf = *postbuf; - } - - if (child_index == read_child) { - local->cont.truncate.prebuf = *prebuf; - local->cont.truncate.postbuf = *postbuf; - } - - local->success_count++; - - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } - } - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); + afr_local_t *local = NULL; - if (need_unwind) - local->transaction.unwind (frame, this); + local = frame->local; - call_count = afr_frame_return (frame); - - if (call_count == 0) { - local->transaction.resume (frame, this); - } + if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size) + local->stable_write = _gf_false; - return 0; + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - -int32_t -afr_truncate_wind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; - - local = frame->local; - priv = this->private; - - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); - - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->truncate, - &local->loc, - local->cont.truncate.offset, - NULL); - - if (!--call_count) - break; - } - } - - return 0; -} - - int -afr_truncate_done (call_frame_t *frame, xlator_t *this) +afr_ftruncate_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; + local = frame->local; + priv = this->private; - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); - - return 0; + STACK_WIND_COOKIE(frame, afr_ftruncate_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->ftruncate, local->fd, + local->cont.ftruncate.offset, local->xdata_req); + return 0; } - int -afr_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset, dict_t *xdata) +afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = 0; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - priv = this->private; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - QUORUM_CHECK(truncate,out); + local->cont.ftruncate.offset = offset; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } + if (!local->xdata_req) + goto out; - AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); - local = transaction_frame->local; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + local->op = GF_FOP_FTRUNCATE; - local->cont.truncate.offset = offset; + local->transaction.wind = afr_ftruncate_wind; + local->transaction.unwind = afr_ftruncate_unwind; - local->transaction.fop = afr_truncate_wind; - local->transaction.done = afr_truncate_done; - local->transaction.unwind = afr_truncate_unwind; + local->transaction.main_frame = frame; - loc_copy (&local->loc, loc); + local->transaction.start = local->cont.ftruncate.offset; + local->transaction.len = 0; - local->transaction.main_frame = frame; - local->transaction.start = offset; - local->transaction.len = 0; + afr_fix_open(fd, this); - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk + if truncate was not a NOP */ + local->stable_write = _gf_true; - ret = 0; + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); - } + AFR_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - /* }}} */ -/* {{{ ftruncate */ - +/* {{{ setattr */ int -afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this) +afr_setattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, - local->op_errno, - &local->cont.ftruncate.prebuf, - &local->cont.ftruncate.postbuf, - NULL); - } + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(setattr, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_setattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int child_index = (long) cookie; - int call_count = -1; - int need_unwind = 0; - int read_child = 0; - - local = frame->local; - priv = this->private; - - read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); - - LOCK (&frame->lock); - { - if (child_index == read_child) { - local->read_child_returned = _gf_true; - } - - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.ftruncate.prebuf = *prebuf; - local->cont.ftruncate.postbuf = *postbuf; - } - - if (child_index == read_child) { - local->cont.ftruncate.prebuf = *prebuf; - local->cont.ftruncate.postbuf = *postbuf; - } - - local->success_count++; - - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } - } - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - if (need_unwind) - local->transaction.unwind (frame, this); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - local->transaction.resume (frame, this); - } - - return 0; + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop, + postop, NULL, xdata); } - int -afr_ftruncate_wind (call_frame_t *frame, xlator_t *this) +afr_setattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; - - local = frame->local; - priv = this->private; - - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); - - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->ftruncate, - local->fd, - local->cont.ftruncate.offset, - NULL); - - if (!--call_count) - break; - } - } - - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_setattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->setattr, &local->loc, + &local->cont.setattr.in_buf, local->cont.setattr.valid, + local->xdata_req); + return 0; } - int -afr_ftruncate_done (call_frame_t *frame, xlator_t *this) +afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, + int32_t valid, dict_t *xdata) { - afr_local_t *local = NULL; - - local = frame->local; - - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + local->cont.setattr.in_buf = *buf; + local->cont.setattr.valid = valid; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->transaction.wind = afr_setattr_wind; + local->transaction.unwind = afr_setattr_unwind; + + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; + + local->op = GF_FOP_SETATTR; + + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; + + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; +out: + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } +/* {{{ fsetattr */ int -afr_do_ftruncate (call_frame_t *frame, xlator_t *this) +afr_fsetattr_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t * transaction_frame = NULL; - afr_local_t * local = NULL; - int op_ret = -1; - int op_errno = 0; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - goto out; - } - - transaction_frame->local = local; - frame->local = NULL; - - local->op = GF_FOP_FTRUNCATE; - - local->transaction.fop = afr_ftruncate_wind; - local->transaction.done = afr_ftruncate_done; - local->transaction.unwind = afr_ftruncate_unwind; - - local->transaction.main_frame = frame; - - local->transaction.start = local->cont.ftruncate.offset; - local->transaction.len = 0; - - op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (op_ret < 0) { - op_errno = -op_ret; - goto out; - } + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) + return 0; - op_ret = 0; -out: - if (op_ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, - NULL, NULL); - } + AFR_STACK_UNWIND(fsetattr, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} - return 0; +int +afr_fsetattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop, + postop, NULL, xdata); } +int +afr_fsetattr_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_fsetattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fsetattr, local->fd, + &local->cont.fsetattr.in_buf, local->cont.fsetattr.valid, + local->xdata_req); + return 0; +} int -afr_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, dict_t *xdata) +afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf, + int32_t valid, dict_t *xdata) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = 0; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - priv = this->private; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - if (afr_is_split_brain (this, fd->inode)) { - op_errno = EIO; - goto out; - } - QUORUM_CHECK(ftruncate,out); + local->cont.fsetattr.in_buf = *buf; + local->cont.fsetattr.valid = valid; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); - local = frame->local; + if (!local->xdata_req) + goto out; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + local->transaction.wind = afr_fsetattr_wind; + local->transaction.unwind = afr_fsetattr_unwind; - local->cont.ftruncate.offset = offset; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->fd = fd_ref (fd); + local->op = GF_FOP_FSETATTR; - afr_open_fd_fix (fd, this); + afr_fix_open(fd, this); - afr_do_ftruncate (frame, this); + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = 0; + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } -/* }}} */ - -/* {{{ setattr */ +/* {{{ setxattr */ int -afr_setattr_unwind (call_frame_t *frame, xlator_t *this) +afr_setxattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; - - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, - local->op_errno, - &local->cont.setattr.preop_buf, - &local->cont.setattr.postop_buf, - NULL); - } + local = frame->local; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; + + AFR_STACK_UNWIND(setxattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; } +int +afr_setxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); +} int -afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) +afr_setxattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int child_index = (long) cookie; - int read_child = 0; - int call_count = -1; - int need_unwind = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_setxattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->setxattr, &local->loc, + local->cont.setxattr.dict, local->cont.setxattr.flags, + local->xdata_req); + return 0; +} - local = frame->local; - priv = this->private; +int +afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) - read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL); +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i, ret = 0; + char *op_type = NULL; - LOCK (&frame->lock); - { - if (child_index == read_child) { - local->read_child_returned = _gf_true; - } + local = frame->local; + priv = this->private; + i = (long)cookie; - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.setattr.preop_buf = *preop; - local->cont.setattr.postop_buf = *postop; - } + ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type); + if (ret) + goto out; - if (child_index == read_child) { - local->cont.setattr.preop_buf = *preop; - local->cont.setattr.postop_buf = *postop; - } + gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO, + op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s", + priv->children[i]->name, "op_ret=%s", + op_ret ? "failed" : "succeeded", NULL); - local->success_count++; +out: + syncbarrier_wake(&local->barrier); + return 0; +} - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } - } - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); +int +afr_emptyb_set_pending_changelog(call_frame_t *frame, xlator_t *this, + unsigned char *locked_nodes) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = 0, i = 0; - if (need_unwind) - local->transaction.unwind (frame, this); + local = frame->local; + priv = this->private; - call_count = afr_frame_return (frame); + AFR_ONLIST(locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk, + xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, local->xattr_req, + NULL); - if (call_count == 0) { - local->transaction.resume (frame, this); - } + /* It is sufficient if xattrop was successful on one child */ + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; - return 0; + if (local->replies[i].op_ret == 0) { + ret = 0; + goto out; + } else { + ret = afr_higher_errno(ret, local->replies[i].op_errno); + } + } +out: + return -ret; } - -int32_t -afr_setattr_wind (call_frame_t *frame, xlator_t *this) +static int +_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc, + int empty_index, afr_transaction_type type, + char *op_type, const int op_type_len) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; - - local = frame->local; - priv = this->private; - - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); - - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } + int count = 0; + int ret = -ENOMEM; + int idx = -1; + int d_idx = -1; + unsigned char *locked_nodes = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + + locked_nodes = alloca0(priv->child_count); + + idx = afr_index_for_transaction_type(type); + d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION); + + local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS); + if (!local->pending) + goto out; + + local->pending[empty_index][idx] = hton32(1); + + if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION)) + local->pending[empty_index][d_idx] = hton32(1); + + local->xdata_req = dict_new(); + if (!local->xdata_req) + goto out; + + ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op", + SLEN("replicate-brick-op"), op_type, op_type_len); + if (ret) + goto out; + + local->xattr_req = dict_new(); + if (!local->xattr_req) + goto out; + + ret = afr_set_pending_dict(priv, local->xattr_req, local->pending); + if (ret < 0) + goto out; + + if (AFR_ENTRY_TRANSACTION == type) { + count = afr_selfheal_entrylk(frame, this, loc->inode, this->name, NULL, + locked_nodes); + } else { + count = afr_selfheal_inodelk(frame, this, loc->inode, this->name, + LLONG_MAX - 1, 0, locked_nodes); + } + + if (!count) { + gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS, + NULL); + ret = -EAGAIN; + goto unlock; + } + + ret = afr_emptyb_set_pending_changelog(frame, this, locked_nodes); + if (ret) + goto unlock; + ret = 0; +unlock: + if (AFR_ENTRY_TRANSACTION == type) { + afr_selfheal_unentrylk(frame, this, loc->inode, this->name, NULL, + locked_nodes, NULL); + } else { + afr_selfheal_uninodelk(frame, this, loc->inode, this->name, + LLONG_MAX - 1, 0, locked_nodes); + } +out: + return ret; +} - local->call_count = call_count; +void +afr_brick_args_cleanup(void *opaque) +{ + afr_empty_brick_args_t *data = NULL; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->setattr, - &local->loc, - &local->cont.setattr.in_buf, - local->cont.setattr.valid, - NULL); - - if (!--call_count) - break; - } - } + data = opaque; + loc_wipe(&data->loc); + GF_FREE(data); +} - return 0; +int +_afr_handle_empty_brick_cbk(int ret, call_frame_t *frame, void *opaque) +{ + afr_brick_args_cleanup(opaque); + return 0; } +int +_afr_handle_empty_brick(void *opaque) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int empty_index = -1; + int ret = -1; + int op_errno = ENOMEM; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + char *op_type = NULL; + int op_type_len = 0; + afr_empty_brick_args_t *data = NULL; + call_frame_t *op_frame = NULL; + + data = opaque; + frame = data->frame; + empty_index = data->empty_index; + if (!data->op_type) + goto out; + + op_frame = copy_frame(frame); + if (!op_frame) { + ret = -1; + op_errno = ENOMEM; + goto out; + } + + op_type = data->op_type; + op_type_len = strlen(op_type); + this = op_frame->this; + priv = this->private; + + afr_set_lk_owner(op_frame, this, op_frame->root); + local = AFR_FRAME_INIT(op_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, &data->loc); + + gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s", + priv->children[empty_index]->name, NULL); + + ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index, + AFR_METADATA_TRANSACTION, op_type, + op_type_len); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + + dict_unref(local->xdata_req); + dict_unref(local->xattr_req); + afr_matrix_cleanup(local->pending, priv->child_count); + local->pending = NULL; + local->xattr_req = NULL; + local->xdata_req = NULL; + + ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index, + AFR_ENTRY_TRANSACTION, op_type, + op_type_len); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = 0; +out: + if (op_frame) { + AFR_STACK_DESTROY(op_frame); + } + AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL); + return 0; +} int -afr_setattr_done (call_frame_t *frame, xlator_t *this) +afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc, + char *data) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; + int ret = -1; + int op_errno = EINVAL; + + local = frame->local; + local->xdata_req = dict_new(); + + if (!local->xdata_req) { + op_errno = ENOMEM; + goto out; + } + + ret = dict_set_int32_sizen(local->xdata_req, "heal-op", + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = dict_set_str_sizen(local->xdata_req, "child-name", data); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + /* set spb choice to -1 whether heal succeeds or not: + * If heal succeeds : spb-choice should be set to -1 as + * it is no longer valid; file is not + * in split-brain anymore. + * If heal doesn't succeed: + * spb-choice should be set to -1 + * otherwise reads will be served + * from spb-choice which is misleading. + */ + ret = afr_inode_split_brain_choice_set(loc->inode, this, -1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED, + NULL); + afr_heal_splitbrain_file(frame, this, loc); + ret = 0; +out: + if (ret < 0) + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + return 0; +} - local = frame->local; +int +afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len) +{ + int spb_child_index = -1; + char *spb_child_str = NULL; - local->transaction.unwind (frame, this); + spb_child_str = alloca0(len + 1); + memcpy(spb_child_str, value, len); - AFR_STACK_DESTROY (frame); + if (!strcmp(spb_child_str, "none")) + return -2; - return 0; + spb_child_index = afr_get_child_index_from_name(this, spb_child_str); + if (spb_child_index < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL, + "subvol=%s", spb_child_str, NULL); + } + return spb_child_index; } - int -afr_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata) +afr_can_set_split_brain_choice(void *opaque) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - QUORUM_CHECK(setattr,out); + afr_spbc_timeout_t *data = opaque; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + int ret = -1; + + frame = data->frame; + loc = data->loc; + this = frame->this; + + ret = afr_is_split_brain(frame, this, loc->inode, loc->gfid, &data->d_spb, + &data->m_spb); + + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s", + uuid_utoa(loc->gfid), NULL); + return ret; +} - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; +int +afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc, + dict_t *dict) +{ + void *choice_value = NULL; + void *resolve_value = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_spbc_timeout_t *data = NULL; + int len = 0; + int spb_child_index = -1; + int ret = -1; + int op_errno = EINVAL; + + priv = this->private; + + ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len); + ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value, + &len); + if (!choice_value && !resolve_value) { + ret = -1; + goto out; + } + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) { + ret = 1; + goto out; + } + + local->op = GF_FOP_SETXATTR; + + if (choice_value) { + spb_child_index = afr_get_split_brain_child_index(this, choice_value, + len); + if (spb_child_index < 0) { + /* Case where value was "none" */ + if (spb_child_index == -2) + spb_child_index = -1; + else { + ret = 1; + op_errno = EINVAL; goto out; + } } - AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); - local = transaction_frame->local; - - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; - - local->cont.setattr.in_buf = *buf; - local->cont.setattr.valid = valid; - - local->transaction.fop = afr_setattr_wind; - local->transaction.done = afr_setattr_done; - local->transaction.unwind = afr_setattr_unwind; - - loc_copy (&local->loc, loc); - - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; - - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; + data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spbc_timeout_t); + if (!data) { + ret = 1; + goto out; + } + data->spb_child_index = spb_child_index; + data->frame = frame; + loc_copy(&local->loc, loc); + data->loc = &local->loc; + ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice, + afr_set_split_brain_choice, NULL, data); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS, + "name=%s", loc->name, NULL); + ret = 1; + op_errno = ENOMEM; goto out; } - ret = 0; -out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); + goto out; + } + + if (resolve_value) { + spb_child_index = afr_get_split_brain_child_index(this, resolve_value, + len); + if (spb_child_index < 0) { + ret = 1; + goto out; } - return 0; + afr_split_brain_resolve_do(frame, this, loc, + priv->children[spb_child_index]->name); + ret = 0; + } +out: + /* key was correct but value was invalid when ret == 1 */ + if (ret == 1) { + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + if (data) + GF_FREE(data); + ret = 0; + } + return ret; } -/* {{{ fsetattr */ - int -afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this) +afr_handle_spb_choice_timeout(xlator_t *this, call_frame_t *frame, dict_t *dict) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + int ret = -1; + int op_errno = 0; + uint64_t timeout = 0; + afr_private_t *priv = NULL; - local = frame->local; + priv = this->private; - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, - local->op_errno, - &local->cont.fsetattr.preop_buf, - &local->cont.fsetattr.postop_buf, - NULL); - } + ret = dict_get_uint64(dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout); + if (!ret) { + priv->spb_choice_timeout = timeout * 60; + AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL); + } - return 0; + return ret; } - int -afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) +afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc, + dict_t *dict) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int child_index = (long) cookie; - int read_child = 0; - int call_count = -1; - int need_unwind = 0; + int ret = -1; + int ab_ret = -1; + int empty_index = -1; + int op_errno = EPERM; + char *empty_brick = NULL; + char *op_type = NULL; + afr_empty_brick_args_t *data = NULL; + + ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick); + if (!ret) + op_type = GF_AFR_REPLACE_BRICK; + + ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick); + if (!ab_ret) + op_type = GF_AFR_ADD_BRICK; + + if (ret && ab_ret) + goto out; + + if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) { + gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR, + "op_type=%s", op_type, NULL); + ret = 1; + goto out; + } + empty_index = afr_get_child_index_from_name(this, empty_brick); + + if (empty_index < 0) { + /* Didn't belong to this replica pair + * Just do a no-op + */ + AFR_STACK_UNWIND(setxattr, frame, 0, 0, NULL); + return 0; + } else { + data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_empty_brick_t); + if (!data) { + ret = 1; + op_errno = ENOMEM; + goto out; + } + data->frame = frame; + loc_copy(&data->loc, loc); + data->empty_index = empty_index; + data->op_type = op_type; + ret = synctask_new(this->ctx->env, _afr_handle_empty_brick, + _afr_handle_empty_brick_cbk, NULL, data); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS, + NULL); + ret = 1; + op_errno = ENOMEM; + afr_brick_args_cleanup(data); + goto out; + } + } + ret = 0; +out: + if (ret == 1) { + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + ret = 0; + } + return ret; +} - local = frame->local; - priv = this->private; +static int +afr_handle_special_xattr(xlator_t *this, call_frame_t *frame, loc_t *loc, + dict_t *dict) +{ + int ret = -1; - read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + ret = afr_handle_split_brain_commands(this, frame, loc, dict); + if (ret == 0) + goto out; - LOCK (&frame->lock); - { - if (child_index == read_child) { - local->read_child_returned = _gf_true; - } + ret = afr_handle_spb_choice_timeout(this, frame, dict); + if (ret == 0) + goto out; - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); + /* Applicable for replace-brick and add-brick commands */ + ret = afr_handle_empty_brick(this, frame, loc, dict); +out: + return ret; +} - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - local->cont.fsetattr.preop_buf = *preop; - local->cont.fsetattr.postop_buf = *postop; - } +int +afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = EINVAL; - if (child_index == read_child) { - local->cont.fsetattr.preop_buf = *preop; - local->cont.fsetattr.postop_buf = *postop; - } + GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out); - local->success_count++; + GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out); - if ((local->success_count >= priv->wait_count) - && local->read_child_returned) { - need_unwind = 1; - } - } - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); + ret = afr_handle_special_xattr(this, frame, loc, dict); + if (ret == 0) + return 0; - if (need_unwind) - local->transaction.unwind (frame, this); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - call_count = afr_frame_return (frame); + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - if (call_count == 0) { - local->transaction.resume (frame, this); - } + local->cont.setxattr.dict = dict_ref(dict); + local->cont.setxattr.flags = flags; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - return 0; -} + if (!local->xdata_req) + goto out; + local->transaction.wind = afr_setxattr_wind; + local->transaction.unwind = afr_setxattr_unwind; -int32_t -afr_fsetattr_wind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local = frame->local; - priv = this->private; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); + local->op = GF_FOP_SETXATTR; - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - local->call_count = call_count; + return 0; +out: + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fsetattr, - local->fd, - &local->cont.fsetattr.in_buf, - local->cont.fsetattr.valid, - NULL); - - if (!--call_count) - break; - } - } + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } +/* {{{ fsetxattr */ int -afr_fsetattr_done (call_frame_t *frame, xlator_t *this) +afr_fsetxattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - local->transaction.unwind (frame, this); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) + return 0; - AFR_STACK_DESTROY (frame); + AFR_STACK_UNWIND(fsetxattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; +} - return 0; +int +afr_fsetxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); } int -afr_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata) +afr_fsetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_fsetxattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fsetxattr, local->fd, + local->cont.fsetxattr.dict, local->cont.fsetxattr.flags, + local->xdata_req); + return 0; +} - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); +int +afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - priv = this->private; + GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out); - if (afr_is_split_brain (this, fd->inode)) { - op_errno = EIO; - goto out; - } + GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out); - QUORUM_CHECK(fsetattr,out); + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); - local = transaction_frame->local; + local->cont.fsetxattr.dict = dict_ref(dict); + local->cont.fsetxattr.flags = flags; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - local->cont.fsetattr.in_buf = *buf; - local->cont.fsetattr.valid = valid; + if (!local->xdata_req) + goto out; - local->transaction.fop = afr_fsetattr_wind; - local->transaction.done = afr_fsetattr_done; - local->transaction.unwind = afr_fsetattr_unwind; + local->transaction.wind = afr_fsetxattr_wind; + local->transaction.unwind = afr_fsetxattr_unwind; - local->fd = fd_ref (fd); + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - afr_open_fd_fix (fd, this); + local->op = GF_FOP_FSETXATTR; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - ret = 0; + return 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL); + return 0; } +/* }}} */ -/* {{{ setxattr */ - +/* {{{ removexattr */ int -afr_setxattr_unwind (call_frame_t *frame, xlator_t *this) +afr_removexattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (setxattr, main_frame, - local->op_ret, local->op_errno, - NULL); - } + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} - - -int -afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int need_unwind = 0; - - local = frame->local; - priv = this->private; - LOCK (&frame->lock); - { - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; - - if (local->success_count == priv->child_count) { - need_unwind = 1; - } - } - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - if (need_unwind) - local->transaction.unwind (frame, this); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - local->transaction.resume (frame, this); - } - - return 0; + AFR_STACK_UNWIND(removexattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; } - int -afr_setxattr_wind (call_frame_t *frame, xlator_t *this) +afr_removexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; - - local = frame->local; - priv = this->private; - - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); - - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->setxattr, - &local->loc, - local->cont.setxattr.dict, - local->cont.setxattr.flags, - NULL); - - if (!--call_count) - break; - } - } - - return 0; + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); } - int -afr_setxattr_done (call_frame_t *frame, xlator_t *this) +afr_removexattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = frame->local; - - local->transaction.unwind (frame, this); + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - AFR_STACK_DESTROY (frame); + local = frame->local; + priv = this->private; - return 0; + STACK_WIND_COOKIE(frame, afr_removexattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->removexattr, &local->loc, + local->cont.removexattr.name, local->xdata_req); + return 0; } int -afr_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) +afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = EINVAL; - - VALIDATE_OR_GOTO (this, out); + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict, - op_errno, out); + GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out); - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict, - op_errno, out); + GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out); - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this->private, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - priv = this->private; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - QUORUM_CHECK(setxattr,out); - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } + local->cont.removexattr.name = gf_strdup(name); - AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); - local = transaction_frame->local; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + if (!local->xdata_req) + goto out; - local->cont.setxattr.dict = dict_ref (dict); - local->cont.setxattr.flags = flags; + local->transaction.wind = afr_removexattr_wind; + local->transaction.unwind = afr_removexattr_unwind; - local->transaction.fop = afr_setxattr_wind; - local->transaction.done = afr_setxattr_done; - local->transaction.unwind = afr_setxattr_unwind; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - loc_copy (&local->loc, loc); + local->op = GF_FOP_REMOVEXATTR; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - ret = 0; + return 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + return 0; } -/* {{{ fsetxattr */ - - +/* ffremovexattr */ int -afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this) +afr_fremovexattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (fsetxattr, main_frame, - local->op_ret, local->op_errno, - NULL); - } + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; + + AFR_STACK_UNWIND(fremovexattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; } +int +afr_fremovexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); +} int -afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_fremovexattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int need_unwind = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - LOCK (&frame->lock); - { - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; + STACK_WIND_COOKIE(frame, afr_fremovexattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fremovexattr, local->fd, + local->cont.removexattr.name, local->xdata_req); + return 0; +} - if (local->success_count == priv->child_count) { - need_unwind = 1; - } - } +int +afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); + GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out); - if (need_unwind) - local->transaction.unwind (frame, this); + GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out); - call_count = afr_frame_return (frame); + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - if (call_count == 0) { - local->transaction.resume (frame, this); - } + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - return 0; -} + local->cont.removexattr.name = gf_strdup(name); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + if (!local->xdata_req) + goto out; -int -afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; + local->transaction.wind = afr_fremovexattr_wind; + local->transaction.unwind = afr_fremovexattr_unwind; - local = frame->local; - priv = this->private; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); + local->op = GF_FOP_FREMOVEXATTR; - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - local->call_count = call_count; + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fsetxattr, - local->fd, - local->cont.fsetxattr.dict, - local->cont.fsetxattr.flags, - NULL); - - if (!--call_count) - break; - } - } + return 0; +out: + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; -} + AFR_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); + return 0; +} int -afr_fsetxattr_done (call_frame_t *frame, xlator_t *this) +afr_fallocate_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); + local = frame->local; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; + + AFR_STACK_UNWIND(fallocate, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; } int -afr_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) +afr_fallocate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); +} - GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict, - op_errno, out); +int +afr_fallocate_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_fallocate_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fallocate, local->fd, + local->cont.fallocate.mode, local->cont.fallocate.offset, + local->cont.fallocate.len, local->xdata_req); + return 0; +} - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict, - op_errno, out); +int +afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_frame_t *transaction_frame = NULL; + afr_local_t *local = NULL; + int ret = -1; + int op_errno = ENOMEM; - priv = this->private; + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - if (afr_is_split_brain (this, fd->inode)) { - op_errno = EIO; - goto out; - } + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - QUORUM_CHECK(fsetxattr,out); + local->cont.fallocate.mode = mode; + local->cont.fallocate.offset = offset; + local->cont.fallocate.len = len; - AFR_LOCAL_ALLOC_OR_GOTO (local, out); + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - goto out; - } + if (!local->xdata_req) + goto out; - transaction_frame->local = local; + local->op = GF_FOP_FALLOCATE; - local->op_ret = -1; + local->transaction.wind = afr_fallocate_wind; + local->transaction.unwind = afr_fallocate_unwind; - local->cont.fsetxattr.dict = dict_ref (dict); - local->cont.fsetxattr.flags = flags; + local->transaction.main_frame = frame; - local->transaction.fop = afr_fsetxattr_wind; - local->transaction.done = afr_fsetxattr_done; - local->transaction.unwind = afr_fsetxattr_unwind; + local->transaction.start = local->cont.fallocate.offset; + local->transaction.len = 0; - local->fd = fd_ref (fd); + afr_fix_open(fd, this); - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - ret = 0; + return 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } /* }}} */ - -/* {{{ removexattr */ - +/* {{{ discard */ int -afr_removexattr_unwind (call_frame_t *frame, xlator_t *this) +afr_discard_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (removexattr, main_frame, - local->op_ret, local->op_errno, - NULL); - } + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_discard_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int call_count = -1; - int need_unwind = 0; + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); +} - local = frame->local; - priv = this->private; +int +afr_discard_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_discard_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->discard, local->fd, + local->cont.discard.offset, local->cont.discard.len, + local->xdata_req); + return 0; +} - LOCK (&frame->lock); - { - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; +int +afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - if (local->success_count == priv->wait_count) { - need_unwind = 1; - } - } + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - if (need_unwind) - local->transaction.unwind (frame, this); + local->cont.discard.offset = offset; + local->cont.discard.len = len; - call_count = afr_frame_return (frame); + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - if (call_count == 0) { - local->transaction.resume (frame, this); - } + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - return 0; -} + if (!local->xdata_req) + goto out; + local->op = GF_FOP_DISCARD; -int32_t -afr_removexattr_wind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; + local->transaction.wind = afr_discard_wind; + local->transaction.unwind = afr_discard_unwind; - local = frame->local; - priv = this->private; + local->transaction.main_frame = frame; - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); + local->transaction.start = local->cont.discard.offset; + local->transaction.len = 0; - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } + afr_fix_open(fd, this); - local->call_count = call_count; + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->removexattr, - &local->loc, - local->cont.removexattr.name, - NULL); - - if (!--call_count) - break; - } - } + return 0; +out: + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } +/* {{{ zerofill */ int -afr_removexattr_done (call_frame_t *frame, xlator_t *this) +afr_zerofill_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); + local = frame->local; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = 0; + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); +} - VALIDATE_OR_GOTO (this, out); +int +afr_zerofill_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_zerofill_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->zerofill, local->fd, + local->cont.zerofill.offset, local->cont.zerofill.len, + local->xdata_req); + return 0; +} - GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*", - name, op_errno, out); +int +afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*", - name, op_errno, out); + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - priv = this->private; + local->cont.zerofill.offset = offset; + local->cont.zerofill.len = len; - QUORUM_CHECK(removexattr,out); + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); - local = transaction_frame->local; + if (!local->xdata_req) + goto out; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + local->op = GF_FOP_ZEROFILL; - local->cont.removexattr.name = gf_strdup (name); + local->transaction.wind = afr_zerofill_wind; + local->transaction.unwind = afr_zerofill_unwind; - local->transaction.fop = afr_removexattr_wind; - local->transaction.done = afr_removexattr_done; - local->transaction.unwind = afr_removexattr_unwind; + local->transaction.main_frame = frame; - loc_copy (&local->loc, loc); + local->transaction.start = local->cont.zerofill.offset; + local->transaction.len = len; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + afr_fix_open(fd, this); - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - ret = 0; + return 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } -/* ffremovexattr */ -int -afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; - - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); +/* }}} */ - if (main_frame) { - AFR_STACK_UNWIND (fremovexattr, main_frame, - local->op_ret, local->op_errno, - NULL); - } - return 0; +int32_t +afr_xattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, xattr, xdata); } - int -afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_xattrop_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int call_count = -1; - int need_unwind = 0; - - local = frame->local; - priv = this->private; - - LOCK (&frame->lock); - { - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; - - if (local->success_count == priv->wait_count) { - need_unwind = 1; - } - } - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - if (need_unwind) - local->transaction.unwind (frame, this); + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_xattrop_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->xattrop, &local->loc, + local->cont.xattrop.optype, local->cont.xattrop.xattr, + local->xdata_req); + return 0; +} - call_count = afr_frame_return (frame); +int +afr_xattrop_unwind(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - if (call_count == 0) { - local->transaction.resume (frame, this); - } + local = frame->local; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(xattrop, main_frame, local->op_ret, local->op_errno, + local->xattr_rsp, local->xdata_rsp); + return 0; +} int32_t -afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this) +afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int i = 0; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - local = frame->local; - priv = this->private; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } + local->cont.xattrop.xattr = dict_ref(xattr); + local->cont.xattrop.optype = optype; + if (xdata) + local->xdata_req = dict_ref(xdata); - local->call_count = call_count; + local->transaction.wind = afr_xattrop_wind; + local->transaction.unwind = afr_xattrop_unwind; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) { - STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fremovexattr, - local->fd, - local->cont.removexattr.name, - NULL); - - if (!--call_count) - break; - } - } + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - return 0; -} + local->op = GF_FOP_XATTROP; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; -int -afr_fremovexattr_done (call_frame_t *frame, xlator_t *this) -{ - afr_local_t * local = frame->local; - - local->transaction.unwind (frame, this); + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - AFR_STACK_DESTROY (frame); + return 0; +out: + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL); + return 0; } +int32_t +afr_fxattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, xattr, xdata); +} int -afr_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) +afr_fxattrop_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_ret = -1; - int op_errno = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_fxattrop_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fxattrop, local->fd, + local->cont.xattrop.optype, local->cont.xattrop.xattr, + local->xdata_req); + return 0; +} - VALIDATE_OR_GOTO (this, out); +int +afr_fxattrop_unwind(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*", - name, op_errno, out); + local = frame->local; - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*", - name, op_errno, out); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) + return 0; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this->private, out); + AFR_STACK_UNWIND(fxattrop, main_frame, local->op_ret, local->op_errno, + local->xattr_rsp, local->xdata_rsp); + return 0; +} - priv = this->private; - if (afr_is_split_brain (this, fd->inode)) { - op_errno = EIO; - goto out; - } +int32_t +afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + local->cont.xattrop.xattr = dict_ref(xattr); + local->cont.xattrop.optype = optype; + if (xdata) + local->xdata_req = dict_ref(xdata); + + local->transaction.wind = afr_fxattrop_wind; + local->transaction.unwind = afr_fxattrop_unwind; + + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; + + local->op = GF_FOP_FXATTROP; + + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; + + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; +out: + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - QUORUM_CHECK(fremovexattr, out); + AFR_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL); + return 0; +} - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - goto out; - } +int +afr_fsync_unwind(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - AFR_LOCAL_ALLOC_OR_GOTO (local, out); + local = frame->local; - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - transaction_frame->local = local; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) + return 0; - local->op_ret = -1; + AFR_STACK_UNWIND(fsync, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); - local->cont.removexattr.name = gf_strdup (name); + return 0; +} - local->transaction.fop = afr_fremovexattr_wind; - local->transaction.done = afr_fremovexattr_done; - local->transaction.unwind = afr_fremovexattr_unwind; +int +afr_fsync_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); +} - local->fd = fd_ref (fd); +int +afr_fsync_wind(call_frame_t *frame, xlator_t *this, int subvol) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local = frame->local; + priv = this->private; - op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (op_ret < 0) { - op_errno = -op_ret; - goto out; - } + STACK_WIND_COOKIE(frame, afr_fsync_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fsync, local->fd, + local->cont.fsync.datasync, local->xdata_req); + return 0; +} - op_ret = 0; +int +afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int32_t op_errno = ENOMEM; + int8_t last_fsync = 0; + + AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + if (xdata) { + local->xdata_req = dict_copy_with_ref(xdata, NULL); + if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { + if (last_fsync) { + local->transaction.disable_delayed_post_op = _gf_true; + } + } + } else { + local->xdata_req = dict_new(); + } + + if (!local->xdata_req) + goto out; + + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; + + local->op = GF_FOP_FSYNC; + local->cont.fsync.datasync = datasync; + + if (afr_fd_has_witnessed_unstable_write(this, fd->inode)) { + /* don't care. we only wanted to CLEAR the bit */ + } + + local->transaction.wind = afr_fsync_wind; + local->transaction.unwind = afr_fsync_unwind; + + local->transaction.main_frame = frame; + + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (op_ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); - } + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - return 0; + AFR_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); + + return 0; } |
