From 6d3739292b7b51d2ddbab75b5f884fb38925b943 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Thu, 16 Jan 2014 16:14:36 -0800 Subject: cluster/afr: refactor - Remove client side self-healing completely (opendir, openfd, lookup) - Re-work readdir-failover to work reliably in case of NFS - Remove unused/dead lock recovery code - Consistently use xdata in both calls and callbacks in all FOPs - Per-inode event generation, used to force inode ctx refresh - Implement dirty flag support (in place of pending counts) - Eliminate inode ctx structure, use read subvol bits + event_generation - Implement inode ctx refreshing based on event generation - Provide backward compatibility in transactions - remove unused variables and functions - make code more consistent in style and pattern - regularize and clean up inode-write transaction code - regularize and clean up dir-write transaction code - regularize and clean up common FOPs - reorganize transaction framework code - skip setting xattrs in pending dict if nothing is pending - re-write self-healing code using syncops - re-write simpler self-heal-daemon Change-Id: I1e4080c9796c8a2815c2dab4be3073f389d614a8 BUG: 1021686 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/6010 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-transaction.c | 1457 ++++++++++++----------------- 1 file changed, 610 insertions(+), 847 deletions(-) (limited to 'xlators/cluster/afr/src/afr-transaction.c') diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 20306e469..f974fdb59 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -18,188 +18,130 @@ #include +gf_boolean_t +afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this); + +gf_boolean_t +afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this); + +int +afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume); -#define LOCKED_NO 0x0 /* no lock held */ -#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path - of RENAME */ -#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */ -afr_fd_ctx_t * -__afr_fd_ctx_get (fd_t *fd, xlator_t *this) +int +__afr_txn_write_fop (call_frame_t *frame, xlator_t *this) { - uint64_t ctx = 0; - int ret = 0; - afr_fd_ctx_t *fd_ctx = NULL; - int i = 0; + afr_local_t *local = NULL; afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + local = frame->local; priv = this->private; - ret = __fd_ctx_get (fd, this, &ctx); - - if (ret < 0 && fd_is_anonymous (fd)) { - ret = __afr_fd_ctx_set (this, fd); - if (ret < 0) - goto out; - - ret = __fd_ctx_get (fd, this, &ctx); - if (ret < 0) - goto out; + call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count); - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - for (i = 0; i < priv->child_count; i++) - fd_ctx->opened_on[i] = AFR_FD_OPENED; + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; } - fd_ctx = (afr_fd_ctx_t *)(long) ctx; -out: - return fd_ctx; -} - + local->call_count = call_count; -afr_fd_ctx_t * -afr_fd_ctx_get (fd_t *fd, xlator_t *this) -{ - afr_fd_ctx_t *fd_ctx = NULL; + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + local->transaction.wind (frame, this, i); - LOCK(&fd->lock); - { - fd_ctx = __afr_fd_ctx_get (fd, this); + if (!--call_count) + break; + } } - UNLOCK(&fd->lock); - return fd_ctx; + return 0; } -static void -afr_save_lk_owner (call_frame_t *frame) +int +__afr_txn_write_done (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; + afr_local_t *local = NULL; local = frame->local; - local->saved_lk_owner = frame->root->lk_owner; + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; } -static void -afr_restore_lk_owner (call_frame_t *frame) +call_frame_t* +afr_transaction_detach_fop_frame (call_frame_t *frame) { - afr_local_t * local = NULL; + afr_local_t * local = NULL; + call_frame_t *fop_frame = NULL; local = frame->local; - frame->root->lk_owner = local->saved_lk_owner; -} - -static void -__mark_all_pending (int32_t *pending[], int child_count, - afr_transaction_type type) -{ - int i = 0; - int j = 0; - - for (i = 0; i < child_count; i++) { - j = afr_index_for_transaction_type (type); - pending[i][j] = hton32 (1); + LOCK (&frame->lock); + { + fop_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; } + UNLOCK (&frame->lock); + + return fop_frame; } static void -__mark_child_dead (int32_t *pending[], int child_count, int child, - afr_transaction_type type) +afr_save_lk_owner (call_frame_t *frame) { - int j = 0; + afr_local_t * local = NULL; - j = afr_index_for_transaction_type (type); + local = frame->local; - pending[child][j] = 0; + local->saved_lk_owner = frame->root->lk_owner; } static void -__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index) +afr_restore_lk_owner (call_frame_t *frame) { - afr_local_t *local = NULL; - afr_fd_ctx_t *fd_ctx = NULL; + afr_local_t * local = NULL; local = frame->local; - if (!local->fd) - return; - - fd_ctx = afr_fd_ctx_get (local->fd, this); - - if (!fd_ctx) - goto out; - - LOCK (&local->fd->lock); - { - if (local->transaction.type == AFR_DATA_TRANSACTION) - fd_ctx->pre_op_done[child_index]++; - } - UNLOCK (&local->fd->lock); -out: - return; -} - -static void -__mark_non_participant_children (int32_t *pending[], int child_count, - unsigned char *participants, - afr_transaction_type type) -{ - int i = 0; - int j = 0; - - j = afr_index_for_transaction_type (type); - for (i = 0; i < child_count; i++) { - if (!participants[i]) - pending[i][j] = 0; - } + frame->root->lk_owner = local->saved_lk_owner; } - void -__mark_all_success (int32_t *pending[], int child_count, - afr_transaction_type type) +__mark_all_success (call_frame_t *frame, xlator_t *this) { - int i; - int j; - - for (i = 0; i < child_count; i++) { - j = afr_index_for_transaction_type (type); - pending[i][j] = hton32 (-1); - } -} + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i; -void -_set_all_child_errno (int *child_errno, unsigned int child_count) -{ - int i = 0; + local = frame->local; + priv = this->private; - for (i = 0; i < child_count; i++) - if (child_errno[i] == 0) - child_errno[i] = ENOTCONN; + for (i = 0; i < priv->child_count; i++) { + local->transaction.failed_subvols[i] = 0; + } } -void + +int afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; - afr_private_t *priv = NULL; fd_t *fd = NULL; local = frame->local; - priv = this->private; fd = local->fd; - __mark_all_success (local->pending, priv->child_count, - local->transaction.type); - - _set_all_child_errno (local->child_errno, priv->child_count); - /* Perform fops with the lk-owner from top xlator. * Eg: lk-owner of posix-lk and flush should be same, * flush cant clear the posix-lks without that lk-owner. @@ -208,6 +150,10 @@ afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) frame->root->lk_owner = local->transaction.main_frame->root->lk_owner; + if (local->pre_op_compat) + /* old mode, pre-op was done as afr_changelog_do() + just now, before OP */ + afr_changelog_pre_op_update (frame, this); /* The wake up needs to happen independent of what type of fop arrives here. If it was @@ -220,6 +166,8 @@ afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) if (fd) afr_delayed_changelog_wake_up (this, fd); local->transaction.fop (frame, this); + + return 0; } @@ -285,39 +233,28 @@ __fop_changelog_needed (call_frame_t *frame, xlator_t *this) return op_ret; } + int -afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending, - int child, afr_xattrop_type_t op) +afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending) { int i = 0; int ret = 0; + int pending_zero[AFR_NUM_CHANGE_LOGS] = {0, }; - if (op == LOCAL_FIRST) { - ret = dict_set_static_bin (xattr, priv->pending_key[child], - pending[child], - AFR_NUM_CHANGE_LOGS * sizeof (int32_t)); - if (ret) - goto out; - } for (i = 0; i < priv->child_count; i++) { - if (i == child) - continue; + if (!memcmp (pending_zero, pending[i], sizeof (pending_zero))) + /* don't set xattrs for non-pending servers */ + continue; + ret = dict_set_static_bin (xattr, priv->pending_key[i], - pending[i], - AFR_NUM_CHANGE_LOGS * sizeof (int32_t)); + pending[i], + AFR_NUM_CHANGE_LOGS * sizeof (int)); /* 3 = data+metadata+entry */ - if (ret < 0) - goto out; - } - if (op == LOCAL_LAST) { - ret = dict_set_static_bin (xattr, priv->pending_key[child], - pending[child], - AFR_NUM_CHANGE_LOGS * sizeof (int32_t)); if (ret) - goto out; + break; } -out: + return ret; } @@ -346,102 +283,34 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type) /* {{{ pending */ -int32_t -afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr, - dict_t *xdata) + +int +afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this) { + afr_local_t *local = NULL; + afr_private_t *priv = NULL; afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int call_count = -1; - priv = this->private; - local = frame->local; + local = frame->local; + priv = this->private; int_lock = &local->internal_lock; - LOCK (&frame->lock); - { - call_count = --local->call_count; - } - UNLOCK (&frame->lock); - - if (call_count == 0) { - if (local->transaction.resume_stub) { - call_resume (local->transaction.resume_stub); - local->transaction.resume_stub = NULL; - } + if (local->transaction.resume_stub) { + call_resume (local->transaction.resume_stub); + local->transaction.resume_stub = NULL; + } - if (afr_lock_server_count (priv, local->transaction.type) == 0) { - local->transaction.done (frame, this); - } else { - int_lock->lock_cbk = local->transaction.done; - afr_unlock (frame, this); - } - } + if (afr_lock_server_count (priv, local->transaction.type) == 0) { + local->transaction.done (frame, this); + } else { + int_lock->lock_cbk = local->transaction.done; + afr_unlock (frame, this); + } - return 0; + return 0; } -void -afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this, - inode_t *inode, afr_transaction_type type) -{ - int i = -1; - int count = 0; - int read_child = -1; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int **pending = NULL; - int idx = 0; - int32_t *stale_children = NULL; - int32_t *fresh_children = NULL; - gf_boolean_t rm_stale_children = _gf_false; - - idx = afr_index_for_transaction_type (type); - - priv = this->private; - local = frame->local; - pending = local->pending; - - if (local->op_ret < 0) - goto out; - fresh_children = local->fresh_children; - read_child = afr_inode_get_read_ctx (this, inode, fresh_children); - if (read_child < 0) { - gf_log (this->name, GF_LOG_DEBUG, "Possible split-brain " - "for %s", uuid_utoa (inode->gfid)); - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (!afr_is_child_present (fresh_children, - priv->child_count, i)) - continue; - if (pending[i][idx]) - continue; - /* child is down or op failed on it */ - if (!stale_children) - stale_children = afr_children_create (priv->child_count); - if (!stale_children) - goto out; - - rm_stale_children = _gf_true; - stale_children[count++] = i; - gf_log (this->name, GF_LOG_DEBUG, "Removing stale child " - "%d for %s", i, uuid_utoa (inode->gfid)); - } - - if (!rm_stale_children) - goto out; - - afr_inode_rm_stale_children (this, inode, stale_children); -out: - GF_FREE (stale_children); - return; -} - afr_inodelk_t* afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom) { @@ -478,423 +347,468 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock) return locked_nodes; } + int -afr_changelog_pre_op_call_count (afr_transaction_type type, - afr_internal_lock_t *int_lock, - unsigned int child_count) +afr_changelog_call_count (afr_transaction_type type, + unsigned char *pre_op_subvols, + unsigned int child_count) { - int call_count = 0; - unsigned char *locked_nodes = NULL; + int call_count = 0; - locked_nodes = afr_locked_nodes_get (type, int_lock); - GF_ASSERT (locked_nodes); + call_count = AFR_COUNT(pre_op_subvols, child_count); - call_count = afr_locked_children_count (locked_nodes, child_count); if (type == AFR_ENTRY_RENAME_TRANSACTION) call_count *= 2; return call_count; } -int -afr_changelog_post_op_call_count (afr_transaction_type type, - unsigned char *pre_op, - unsigned int child_count) -{ - int call_count = 0; - call_count = afr_pre_op_done_children_count (pre_op, child_count); - if (type == AFR_ENTRY_RENAME_TRANSACTION) - call_count *= 2; +gf_boolean_t +afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; - return call_count; -} + local = frame->local; + priv = this->private; -void -afr_compute_txn_changelog (afr_local_t *local, afr_private_t *priv) -{ - int i = 0; - int index = 0; - int32_t postop = 0; - int32_t preop = 1; - int32_t **txn_changelog = NULL; - - txn_changelog = local->transaction.txn_changelog; - index = afr_index_for_transaction_type (local->transaction.type); for (i = 0; i < priv->child_count; i++) { - postop = ntoh32 (local->pending[i][index]); - txn_changelog[i][index] = hton32 (postop + preop); + if (local->transaction.failed_subvols[i]) + return _gf_false; } -} -afr_xattrop_type_t -afr_get_postop_xattrop_type (int32_t **pending, int optimized, int child, - afr_transaction_type type) -{ - int index = 0; - afr_xattrop_type_t op = LOCAL_LAST; - - index = afr_index_for_transaction_type (type); - if (optimized && !pending[child][index]) - op = LOCAL_FIRST; - return op; + return _gf_true; } + void -afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr, - int optimized, int child) +afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this) { - int32_t **txn_changelog = NULL; - int32_t **changelog = NULL; - afr_private_t *priv = NULL; - int ret = 0; - afr_xattrop_type_t op = LOCAL_LAST; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int op_errno = 0; + int i_errno = 0; + gf_boolean_t matching_errors = _gf_true; + int i = 0; - priv = this->private; - txn_changelog = local->transaction.txn_changelog; - op = afr_get_postop_xattrop_type (local->pending, optimized, child, - local->transaction.type); - if (optimized) - changelog = txn_changelog; - else - changelog = local->pending; - ret = afr_set_pending_dict (priv, xattr, changelog, child, op); - if (ret < 0) - gf_log (this->name, GF_LOG_INFO, - "failed to set pending entry"); + priv = this->private; + local = frame->local; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret != -1) { + /* Operation succeeded on at least on subvol, + so it is not a failed-everywhere situation. + */ + matching_errors = _gf_false; + break; + } + i_errno = local->replies[i].op_errno; + + if (i_errno == ENOTCONN) { + /* ENOTCONN is not a symmetric error. We do not + know if the operation was performed on the + backend or not. + */ + matching_errors = _gf_false; + break; + } + + if (!op_errno) { + op_errno = i_errno; + } else if (op_errno != i_errno) { + /* Mismatching op_errno's */ + matching_errors = _gf_false; + break; + } + } + + if (matching_errors) + __mark_all_success (frame, this); } -gf_boolean_t -afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this) +int +afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int index = -1; - int i = 0; + afr_private_t * priv = this->private; + int i = 0; + int ret = 0; + int idx = 0; + afr_local_t * local = NULL; + dict_t *xattr = NULL; + int nothing_failed = 1; + gf_boolean_t need_undirty = _gf_false; local = frame->local; - priv = this->private; + idx = afr_index_for_transaction_type (local->transaction.type); - index = afr_index_for_transaction_type (local->transaction.type); + nothing_failed = afr_txn_nothing_failed (frame, this); - for (i = 0; i < priv->child_count; i++) { - if (local->pending[i][index] == 0) - return _gf_false; - } + if (afr_changelog_pre_op_uninherit (frame, this)) + need_undirty = _gf_false; + else + need_undirty = _gf_true; - return _gf_true; -} + if (nothing_failed && !need_undirty) { + afr_changelog_post_op_done (frame, this); + goto out; + } -static void -afr_dir_fop_handle_all_fop_failures (call_frame_t *frame) -{ - xlator_t *this = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + xattr = dict_new (); + if (!xattr) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done (frame, this); + goto out; + } - this = frame->this; - local = frame->local; - priv = this->private; + if (need_undirty) { + local->dirty[idx] = hton32(-1); - if ((local->transaction.type != AFR_ENTRY_TRANSACTION) && - (local->transaction.type != AFR_ENTRY_RENAME_TRANSACTION)) - return; + ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done (frame, this); + goto out; + } - if (local->op_ret >= 0) - goto out; + } + + if (!nothing_failed) { + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.failed_subvols[i]) + local->pending[i][idx] = hton32(1); + } + ret = afr_set_pending_dict (priv, xattr, local->pending); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done (frame, this); + goto out; + } + + } - __mark_all_success (local->pending, priv->child_count, - local->transaction.type); + afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done); out: - return; + if (xattr) + dict_unref (xattr); + + return 0; } -static void -afr_data_handle_quota_errors (call_frame_t *frame, xlator_t *this) + +gf_boolean_t +afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this) { - int i = 0; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - gf_boolean_t all_quota_failures = _gf_false; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + int i = 0; + gf_boolean_t ret = _gf_false; + afr_fd_ctx_t *fd_ctx = NULL; + int type = 0; - local = frame->local; - priv = this->private; - if (local->transaction.type != AFR_DATA_TRANSACTION) - return; - /* - * Idea is to not leave the file in FOOL-FOOL scenario in case on - * all the bricks data transaction failed with EDQUOT to avoid - * increasing un-necessary load of self-heals in the system. - */ - all_quota_failures = _gf_true; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i] && - (local->child_errno[i] != EDQUOT)) { - all_quota_failures = _gf_false; - break; - } - } - if (all_quota_failures) - __mark_all_success (local->pending, priv->child_count, - local->transaction.type); + local = frame->local; + priv = this->private; + fd = local->fd; + + type = afr_index_for_transaction_type (local->transaction.type); + if (type != AFR_DATA_TRANSACTION) + return !local->transaction.dirtied; + + if (!fd) + return !local->transaction.dirtied; + + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) + return _gf_false; + + if (local->transaction.no_uninherit) + return _gf_false; + + /* This function must be idempotent. So check if we + were called before and return the same answer again. + + It is important to keep this function idempotent for + the call in afr_changelog_post_op_safe() to not have + side effects on the call from afr_changelog_post_op_now() + */ + if (local->transaction.uninherit_done) + return local->transaction.uninherit_value; + + LOCK(&fd->lock); + { + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] != + fd_ctx->pre_op_done[type][i]) { + ret = !local->transaction.dirtied; + goto unlock; + } + } + + if (fd_ctx->inherited[type]) { + ret = _gf_true; + fd_ctx->inherited[type]--; + } else if (fd_ctx->on_disk[type]) { + ret = _gf_false; + fd_ctx->on_disk[type]--; + } else { + /* ASSERT */ + ret = _gf_false; + } + + if (!fd_ctx->inherited[type] && !fd_ctx->on_disk[type]) { + for (i = 0; i < priv->child_count; i++) + fd_ctx->pre_op_done[type][i] = 0; + } + } +unlock: + UNLOCK(&fd->lock); + + local->transaction.uninherit_done = _gf_true; + local->transaction.uninherit_value = ret; + + return ret; } -int -afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) + +gf_boolean_t +afr_changelog_pre_op_inherit (call_frame_t *frame, xlator_t *this) { - afr_private_t * priv = this->private; - afr_internal_lock_t *int_lock = NULL; - int i = 0; - int call_count = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + int i = 0; + gf_boolean_t ret = _gf_false; + afr_fd_ctx_t *fd_ctx = NULL; + int type = 0; - afr_local_t * local = NULL; - afr_fd_ctx_t *fdctx = NULL; - dict_t **xattr = NULL; - int piggyback = 0; - int nothing_failed = 1; + local = frame->local; + priv = this->private; + fd = local->fd; - local = frame->local; - int_lock = &local->internal_lock; + if (local->transaction.type != AFR_DATA_TRANSACTION) + return _gf_false; - __mark_non_participant_children (local->pending, priv->child_count, - local->transaction.pre_op, - local->transaction.type); + type = afr_index_for_transaction_type (local->transaction.type); - afr_data_handle_quota_errors (frame, this); - afr_dir_fop_handle_all_fop_failures (frame); + if (!fd) + return _gf_false; - if (local->fd) - afr_transaction_rm_stale_children (frame, this, - local->fd->inode, - local->transaction.type); + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) + return _gf_false; - xattr = alloca (priv->child_count * sizeof (*xattr)); - memset (xattr, 0, (priv->child_count * sizeof (*xattr))); - for (i = 0; i < priv->child_count; i++) { - xattr[i] = dict_new (); - } + LOCK(&fd->lock); + { + if (!fd_ctx->on_disk[type]) { + /* nothing to inherit yet */ + ret = _gf_false; + goto unlock; + } - call_count = afr_changelog_post_op_call_count (local->transaction.type, - local->transaction.pre_op, - priv->child_count); - local->call_count = call_count; + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] != + fd_ctx->pre_op_done[type][i]) { + /* either inherit exactly, or don't */ + ret = _gf_false; + goto unlock; + } + } - if (local->fd) - fdctx = afr_fd_ctx_get (local->fd, this); + fd_ctx->inherited[type]++; - if (call_count == 0) { - /* no child is up */ - int_lock->lock_cbk = local->transaction.done; - afr_unlock (frame, this); - goto out; - } + ret = _gf_true; - nothing_failed = afr_txn_nothing_failed (frame, this); + local->transaction.inherited = _gf_true; + } +unlock: + UNLOCK(&fd->lock); - afr_compute_txn_changelog (local , priv); + return ret; +} - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) - continue; - if (local->transaction.type != AFR_DATA_TRANSACTION) - afr_set_postop_dict (local, this, xattr[i], - local->optimistic_change_log, i); - switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - { - if (!fdctx) { - afr_set_postop_dict (local, this, xattr[i], - 0, i); - STACK_WIND (frame, afr_changelog_post_op_cbk, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->loc, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - break; - } +gf_boolean_t +afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int i = 0; + gf_boolean_t ret = _gf_false; + int type = 0; - /* local->transaction.postop_piggybacked[] was - precomputed in is_piggyback_postop() when called from - afr_changelog_post_op_safe() - */ + local = frame->local; + priv = this->private; + fd = local->fd; - piggyback = 0; - if (local->transaction.postop_piggybacked[i]) - piggyback = 1; + if (!fd) + return _gf_false; - afr_set_postop_dict (local, this, xattr[i], - piggyback, i); + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) + return _gf_false; - if (nothing_failed && piggyback) { - afr_changelog_post_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], NULL); - } else { - STACK_WIND_COOKIE (frame, - afr_changelog_post_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - } - break; - case AFR_METADATA_TRANSACTION: - { - if (nothing_failed && local->optimistic_change_log) { - afr_changelog_post_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - break; - } + if (local->transaction.inherited) + /* was already inherited in afr_changelog_pre_op */ + return _gf_false; - if (local->fd) - STACK_WIND (frame, afr_changelog_post_op_cbk, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - else - STACK_WIND (frame, afr_changelog_post_op_cbk, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->loc, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - break; + if (!local->transaction.dirtied) + return _gf_false; - case AFR_ENTRY_RENAME_TRANSACTION: - { - if (nothing_failed && local->optimistic_change_log) { - afr_changelog_post_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - } else { - STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.new_parent_loc, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - call_count--; - } + if (!afr_txn_nothing_failed (frame, this)) + return _gf_false; - /* - set it again because previous stack_wind - might have already returned (think of case - where subvolume is posix) and would have - used the dict as placeholder for return - value - */ + type = afr_index_for_transaction_type (local->transaction.type); - afr_set_postop_dict (local, this, xattr[i], - local->optimistic_change_log, i); + ret = _gf_false; - /* fall through */ + LOCK(&fd->lock); + { + if (!fd_ctx->on_disk[type]) { + for (i = 0; i < priv->child_count; i++) + fd_ctx->pre_op_done[type][i] = + local->transaction.pre_op[i]; + } else { + for (i = 0; i < priv->child_count; i++) + if (fd_ctx->pre_op_done[type][i] != + local->transaction.pre_op[i]) { + local->transaction.no_uninherit = 1; + goto unlock; + } + } + fd_ctx->on_disk[type]++; + + ret = _gf_true; + } +unlock: + UNLOCK(&fd->lock); - case AFR_ENTRY_TRANSACTION: - { - if (nothing_failed && local->optimistic_change_log) { - afr_changelog_post_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - break; - } + return ret; +} - if (local->fd) - STACK_WIND (frame, afr_changelog_post_op_cbk, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - else - STACK_WIND (frame, afr_changelog_post_op_cbk, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.parent_loc, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - break; - } - if (!--call_count) - break; - } +int +afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + afr_local_t *local = NULL; + int call_count = -1; -out: - for (i = 0; i < priv->child_count; i++) { - dict_unref (xattr[i]); - } + local = frame->local; + + if (op_ret == -1) + afr_transaction_fop_failed (frame, this, (long) cookie); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + local->transaction.changelog_resume (frame, this); return 0; } -int32_t -afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr, - dict_t *xdata) +int +afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume) { - afr_local_t * local = NULL; - afr_private_t * priv = this->private; - int call_count = -1; - int child_index = (long) cookie; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; - local = frame->local; + local = frame->local; + priv = this->private; - LOCK (&frame->lock); - { - switch (op_ret) { - case 0: - __mark_pre_op_done_on_fd (frame, this, child_index); - //fallthrough we need to mark the pre_op - case 1: - local->transaction.pre_op[child_index] = 1; - /* special op_ret for piggyback */ - break; - case -1: - if (op_errno == ENOTSUP) { - gf_log (this->name, GF_LOG_ERROR, - "xattrop not supported by %s", - priv->children[child_index]->name); - local->op_ret = -1; - - } else if (!child_went_down (op_ret, op_errno)) { - gf_log (this->name, GF_LOG_ERROR, - "xattrop failed on child %s: %s", - priv->children[child_index]->name, - strerror (op_errno)); + call_count = afr_changelog_call_count (local->transaction.type, + local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + changelog_resume (frame, this); + return 0; + } + + local->call_count = call_count; + + local->transaction.changelog_resume = changelog_resume; + + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.pre_op[i]) + continue; + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + if (!local->fd) { + STACK_WIND_COOKIE (frame, afr_changelog_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->loc, + GF_XATTROP_ADD_ARRAY, xattr, + NULL); + } else { + STACK_WIND_COOKIE (frame, afr_changelog_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fxattrop, + local->fd, + GF_XATTROP_ADD_ARRAY, xattr, + NULL); } - local->op_errno = op_errno; - break; - } + break; + case AFR_ENTRY_RENAME_TRANSACTION: - call_count = --local->call_count; - } - UNLOCK (&frame->lock); + STACK_WIND_COOKIE (frame, afr_changelog_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.new_parent_loc, + GF_XATTROP_ADD_ARRAY, xattr, + NULL); + call_count--; - if (call_count == 0) { - if ((local->op_ret == -1) && - (local->op_errno == ENOTSUP)) { - local->transaction.resume (frame, this); - } else { - afr_transaction_perform_fop (frame, this); - } + /* fall through */ + + case AFR_ENTRY_TRANSACTION: + if (local->fd) + STACK_WIND_COOKIE (frame, afr_changelog_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fxattrop, + local->fd, + GF_XATTROP_ADD_ARRAY, xattr, + NULL); + else + STACK_WIND_COOKIE (frame, afr_changelog_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.parent_loc, + GF_XATTROP_ADD_ARRAY, xattr, + NULL); + break; + } + + if (!--call_count) + break; } - return 0; + return 0; } + int afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) { @@ -902,206 +816,122 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) int i = 0; int ret = 0; int call_count = 0; - dict_t **xattr = NULL; - afr_fd_ctx_t *fdctx = NULL; + int op_errno = 0; afr_local_t *local = NULL; - int piggyback = 0; afr_internal_lock_t *int_lock = NULL; unsigned char *locked_nodes = NULL; + unsigned char *pending_subvols = NULL; + int idx = -1; + gf_boolean_t pre_nop = _gf_true; + dict_t *xdata_req = NULL; local = frame->local; int_lock = &local->internal_lock; - - xattr = alloca (priv->child_count * sizeof (*xattr)); - memset (xattr, 0, (priv->child_count * sizeof (*xattr))); - - for (i = 0; i < priv->child_count; i++) { - xattr[i] = dict_new (); - } - - call_count = afr_changelog_pre_op_call_count (local->transaction.type, - int_lock, - priv->child_count); - if (call_count == 0) { - local->internal_lock.lock_cbk = - local->transaction.done; - afr_unlock (frame, this); - goto out; - } - - local->call_count = call_count; - - __mark_all_pending (local->pending, priv->child_count, - local->transaction.type); - - if (local->fd) - fdctx = afr_fd_ctx_get (local->fd, this); + idx = afr_index_for_transaction_type (local->transaction.type); locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock); - for (i = 0; i < priv->child_count; i++) { - if (!locked_nodes[i]) - continue; - ret = afr_set_pending_dict (priv, xattr[i], local->pending, - i, LOCAL_FIRST); - - if (ret < 0) - gf_log (this->name, GF_LOG_INFO, - "failed to set pending entry"); + pending_subvols = alloca0 (priv->child_count); - switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - { - if (!fdctx) { - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &(local->loc), - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - break; - } + for (i = 0; i < priv->child_count; i++) { + if (locked_nodes[i]) { + local->transaction.pre_op[i] = 1; + call_count++; + } else { + pending_subvols[i] = 1; + } + } - LOCK (&local->fd->lock); - { - piggyback = 0; - if (fdctx->pre_op_done[i]) { - fdctx->pre_op_piggyback[i]++; - piggyback = 1; - fdctx->hit++; - } else { - fdctx->miss++; - } - } - UNLOCK (&local->fd->lock); + /* TBD: quorum check w/ call_count */ - afr_set_delayed_post_op (frame, this); + if (call_count == 0) { + op_errno = ENOTCONN; + goto err; + } - if (piggyback) - afr_changelog_pre_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - else - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - break; - case AFR_METADATA_TRANSACTION: - { - if (local->optimistic_change_log) { - afr_changelog_pre_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - break; - } + xdata_req = dict_new(); + if (!xdata_req) { + op_errno = ENOMEM; + goto err; + } - if (local->fd) - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - else - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &(local->loc), - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - break; + pre_nop = _gf_true; + + if (afr_changelog_pre_op_inherit (frame, this)) + goto next; + + if (call_count < priv->child_count) { + /* For subvols we are not performing operation on, + mark them as pending up-front along with the FOP + so that we can safely defer unmarking dirty until + later. + */ + for (i = 0; i < priv->child_count; i++) { + if (pending_subvols[i]) + local->pending[i][idx] = hton32(1); + } + ret = afr_set_pending_dict (priv, xdata_req, + local->pending); + if (ret < 0) { + op_errno = ENOMEM; + goto err; + } + pre_nop = _gf_false; + } - case AFR_ENTRY_RENAME_TRANSACTION: - { - if (local->optimistic_change_log) { - afr_changelog_pre_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - } else { - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.new_parent_loc, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } + if (call_count > 1 && + (local->transaction.type == AFR_DATA_TRANSACTION || + !local->optimistic_change_log)) { + + /* If we are performing change on only one subvol, no + need to mark dirty, because we are setting the pending + counts already anyways + */ + local->dirty[idx] = hton32(1); + + ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + op_errno = ENOMEM; + goto err; + } + + pre_nop = _gf_false; + local->transaction.dirtied = 1; + } - call_count--; - } + if (pre_nop) + goto next; + if (!local->pre_op_compat) { + dict_copy (xdata_req, local->xdata_req); + goto next; + } - /* - set it again because previous stack_wind - might have already returned (think of case - where subvolume is posix) and would have - used the dict as placeholder for return - value - */ + afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop); - ret = afr_set_pending_dict (priv, xattr[i], local->pending, - i, LOCAL_FIRST); + if (xdata_req) + dict_unref (xdata_req); - if (ret < 0) - gf_log (this->name, GF_LOG_INFO, - "failed to set pending entry"); + return 0; +next: + afr_transaction_perform_fop (frame, this); - /* fall through */ + if (xdata_req) + dict_unref (xdata_req); - case AFR_ENTRY_TRANSACTION: - { - if (local->optimistic_change_log) { - afr_changelog_pre_op_cbk (frame, (void *)(long)i, - this, 1, 0, xattr[i], - NULL); - break; - } + return 0; +err: + local->internal_lock.lock_cbk = local->transaction.done; + local->op_ret = -1; + local->op_errno = op_errno; - if (local->fd) - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - else - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.parent_loc, - GF_XATTROP_ADD_ARRAY, xattr[i], - NULL); - } - break; - } + afr_unlock (frame, this); - if (!--call_count) - break; - } -out: - for (i = 0; i < priv->child_count; i++) { - dict_unref (xattr[i]); - } + if (xdata_req) + dict_unref (xdata_req); - return 0; + return 0; } @@ -1365,15 +1195,15 @@ afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this) } gf_boolean_t -afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this) +afr_are_multiple_fds_opened (fd_t *fd, xlator_t *this) { - afr_inode_ctx_t *ictx = NULL; + afr_fd_ctx_t *fd_ctx = NULL; - if (!inode) { + if (!fd) { /* If false is returned, it may keep on taking eager-lock * which may lead to starvation, so return true to avoid that. */ - gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode"); + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid fd"); return _gf_true; } /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock @@ -1383,32 +1213,22 @@ afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this) * if open-fd-count is > 1 */ - ictx = afr_inode_ctx_get (inode, this); - if (!ictx) + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) return _gf_true; - if (ictx->open_fd_count > 1) + if (fd_ctx->open_fd_count > 1) return _gf_true; return _gf_false; } -gf_boolean_t -afr_any_fops_failed (afr_local_t *local, afr_private_t *priv) -{ - if (local->success_count != priv->child_count) - return _gf_true; - return _gf_false; -} gf_boolean_t is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; gf_boolean_t res = _gf_false; - afr_private_t *priv = NULL; - - priv = this->private; local = frame->local; if (!local) @@ -1418,10 +1238,10 @@ is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) goto out; //Mark pending changelog ASAP - if (afr_any_fops_failed (local, priv)) + if (!afr_txn_nothing_failed (frame, this)) goto out; - if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this)) + if (local->fd && afr_are_multiple_fds_opened (local->fd, this)) goto out; res = _gf_true; @@ -1445,58 +1265,6 @@ afr_delayed_changelog_wake_up_cbk (void *data) } -/* - Check if the frame is destined to get optimized away - with changelog piggybacking -*/ -static gf_boolean_t -is_piggyback_post_op (call_frame_t *frame, fd_t *fd) -{ - afr_fd_ctx_t *fdctx = NULL; - afr_local_t *local = NULL; - gf_boolean_t piggyback = _gf_true; - afr_private_t *priv = NULL; - int i = 0; - - priv = frame->this->private; - local = frame->local; - fdctx = afr_fd_ctx_get (fd, frame->this); - - LOCK(&fd->lock); - { - piggyback = _gf_true; - - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) - continue; - if (fdctx->pre_op_piggyback[i]) { - fdctx->pre_op_piggyback[i]--; - local->transaction.postop_piggybacked[i] = 1; - } else { - /* For at least _one_ subvolume we cannot - piggyback on the changelog, and have to - perform a hard POST-OP and therefore fsync - if necesssary - */ - piggyback = _gf_false; - GF_ASSERT (fdctx->pre_op_done[i]); - fdctx->pre_op_done[i]--; - } - } - } - UNLOCK(&fd->lock); - - if (!afr_txn_nothing_failed (frame, frame->this)) { - /* something failed in this transaction, - we will be performing a hard post-op - */ - return _gf_false; - } - - return piggyback; -} - - /* SET operation */ int afr_fd_report_unstable_write (xlator_t *this, fd_t *fd) @@ -1521,7 +1289,7 @@ afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd) afr_fd_ctx_t *fdctx = NULL; gf_boolean_t witness = _gf_false; - fdctx = afr_fd_ctx_get (fd, this); + fdctx = afr_fd_ctx_get (fd, this); if (!fdctx) return _gf_true; @@ -1551,10 +1319,10 @@ afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv = this->private; local = frame->local; - if (afr_fop_failed (op_ret, op_errno)) { + if (op_ret != 0) { /* Failure of fsync() is as good as failure of previous write(). So treat it like one. - */ + */ gf_log (this->name, GF_LOG_WARNING, "fsync(%s) failed on subvolume %s. Transaction was %s", uuid_utoa (local->fd->inode->gfid), @@ -1562,14 +1330,14 @@ afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_fop_list[local->op]); afr_transaction_fop_failed (frame, this, child_index); - } + } - call_count = afr_frame_return (frame); + call_count = afr_frame_return (frame); - if (call_count == 0) - afr_changelog_post_op_now (frame, this); + if (call_count == 0) + afr_changelog_post_op_now (frame, this); - return 0; + return 0; } @@ -1580,14 +1348,13 @@ afr_changelog_fsync (call_frame_t *frame, xlator_t *this) int i = 0; int call_count = 0; afr_private_t *priv = NULL; - dict_t *xdata = NULL; - GF_UNUSED int ret = -1; + dict_t *xdata = NULL; + GF_UNUSED int ret = -1; local = frame->local; priv = this->private; - call_count = afr_pre_op_done_children_count (local->transaction.pre_op, - priv->child_count); + call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count); if (!call_count) { /* will go straight to unlock */ @@ -1597,30 +1364,30 @@ afr_changelog_fsync (call_frame_t *frame, xlator_t *this) local->call_count = call_count; - xdata = dict_new(); - if (xdata) - ret = dict_set_int32 (xdata, "batch-fsync", 1); + xdata = dict_new(); + if (xdata) + ret = dict_set_int32 (xdata, "batch-fsync", 1); for (i = 0; i < priv->child_count; i++) { if (!local->transaction.pre_op[i]) continue; STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->fsync, local->fd, - 1, xdata); + (void *) (long) i, priv->children[i], + priv->children[i]->fops->fsync, local->fd, + 1, xdata); if (!--call_count) break; } - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref (xdata); return 0; } - int +int afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; @@ -1634,7 +1401,8 @@ afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this) return 0; } - if (is_piggyback_post_op (frame, local->fd)) { + if (afr_changelog_pre_op_uninherit (frame, this) && + afr_txn_nothing_failed (frame, this)) { /* just detected that this post-op is about to be optimized away as a new write() has already piggybacked on this frame's changelog. @@ -1733,7 +1501,7 @@ out: if (prev_frame) { local = prev_frame->local; local->transaction.resume_stub = stub; - afr_changelog_post_op_safe (prev_frame, this); + afr_changelog_post_op_now (prev_frame, this); } else if (stub) { call_resume (stub); } @@ -1779,13 +1547,9 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) int afr_transaction_resume (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; - afr_private_t *priv = NULL; local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; if (local->transaction.eager_lock_on) { /* We don't need to retain "local" in the @@ -1800,15 +1564,17 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this) afr_restore_lk_owner (frame); + afr_handle_symmetric_errors (frame, this); + + if (!local->pre_op_compat) + /* new mode, pre-op was done along + with OP */ + afr_changelog_pre_op_update (frame, this); + if (__fop_changelog_needed (frame, this)) { afr_changelog_post_op (frame, this); } else { - if (afr_lock_server_count (priv, local->transaction.type) == 0) { - local->transaction.done (frame, this); - } else { - int_lock->lock_cbk = local->transaction.done; - afr_unlock (frame, this); - } + afr_changelog_post_op_done (frame, this); } return 0; @@ -1824,13 +1590,10 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index) { afr_local_t * local = NULL; - afr_private_t * priv = NULL; local = frame->local; - priv = this->private; - __mark_child_dead (local->pending, priv->child_count, - child_index, local->transaction.type); + local->transaction.failed_subvols[child_index] = 1; } @@ -1878,7 +1641,7 @@ afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this) if (!fdctx) return; - if (afr_are_multiple_fds_opened (local->fd->inode, this)) + if (afr_are_multiple_fds_opened (local->fd, this)) return; /* * Once full file lock is acquired in eager-lock phase, overlapping -- cgit