From c539e23023abe743770287439ebe81989a732728 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 9 Dec 2016 07:14:17 +0000 Subject: afr: allow I/O when favorite-child-policy is enabled Problem: Currently, I/O on a split-brained file fails even when the favorite-child-policy is set until the self-heal is complete. Fix: If a valid 'source' is found using the set favorite-child-policy, inspect and reset the afr pending xattrs on the 'sinks' (inside appropriate locks), refresh the inode and then proceed with the read or write transaction. The resetting itself happens in the self-heal code and hence can also happen in the client side background-heal or by the shd's index-heal in addition to the txn code path explained above. When it happens in via heal, we also add checks in undo-pending to not reset the sink xattrs again. > Reviewed-on: http://review.gluster.org/15673 > Tested-by: Pranith Kumar Karampuri > Smoke: Gluster Build System > Reviewed-by: Pranith Kumar Karampuri > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System Change-Id: Ic8c1317720cb26bd114b6fe6af4e58c73b864626 BUG: 1378547 Signed-off-by: Ravishankar N Reported-by: Simon Turcotte-Langevin Reviewed-on: http://review.gluster.org/16091 NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Smoke: Gluster Build System Reviewed-by: Niels de Vos --- xlators/cluster/afr/src/afr-common.c | 205 ++++++++++++++++++++++- xlators/cluster/afr/src/afr-read-txn.c | 13 +- xlators/cluster/afr/src/afr-self-heal-common.c | 159 ++++++++++++++---- xlators/cluster/afr/src/afr-self-heal-data.c | 21 ++- xlators/cluster/afr/src/afr-self-heal-entry.c | 3 + xlators/cluster/afr/src/afr-self-heal-metadata.c | 15 +- xlators/cluster/afr/src/afr-self-heal.h | 24 ++- xlators/cluster/afr/src/afr-transaction.c | 9 +- 8 files changed, 388 insertions(+), 61 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f4d1d63c3d5..2d21f2e426c 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -972,6 +972,13 @@ ret: return -err; } +int +afr_fav_child_reset_sink_xattrs (void *opaque); + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame, + void *opaque); + gf_boolean_t afr_selfheal_enabled (xlator_t *this) { @@ -987,6 +994,82 @@ afr_selfheal_enabled (xlator_t *this) return data || priv->metadata_self_heal || priv->entry_self_heal; } + +int +afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + + call_frame_t *heal_frame = NULL; + afr_local_t *heal_local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + int event_generation = 0; + int read_subvol = -1; + int op_errno = ENOMEM; + int ret = 0; + + local = frame->local; + inode = local->inode; + priv = this->private; + + if (err) + goto refresh_done; + + if (local->op == GF_FOP_LOOKUP) + goto refresh_done; + + ret = afr_inode_get_readable (frame, inode, this, local->readable, + &event_generation, + local->transaction.type); + + if (ret == -EIO || !event_generation) { + /* No readable subvolume even after refresh ==> splitbrain.*/ + if (!priv->fav_child_policy) { + err = -EIO; + goto refresh_done; + } + read_subvol = afr_sh_get_fav_by_policy (this, local->replies, + inode, NULL); + if (read_subvol == -1) { + err = -EIO; + goto refresh_done; + } + + heal_frame = copy_frame (frame); + if (!heal_frame) { + err = -EIO; + goto refresh_done; + } + heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + heal_local = AFR_FRAME_INIT (heal_frame, op_errno); + if (!heal_local) { + err = -EIO; + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->xdata_req = dict_new(); + if (!heal_local->xdata_req) { + err = -EIO; + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->heal_frame = frame; + ret = synctask_new (this->ctx->env, + afr_fav_child_reset_sink_xattrs, + afr_fav_child_reset_sink_xattrs_cbk, + heal_frame, + heal_frame); + return 0; + } + +refresh_done: + afr_local_replies_wipe (local, this->private); + local->refreshfn (frame, this, err); + + return 0; +} + int afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) { @@ -1005,8 +1088,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) err = afr_inode_refresh_err (frame, this); - afr_local_replies_wipe (local, this->private); - if (ret && afr_selfheal_enabled (this) && start_heal) { heal_frame = copy_frame (frame); if (!heal_frame) @@ -1026,7 +1107,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) } refresh_done: - local->refreshfn (frame, this, err); + afr_txn_refresh_done (frame, this, err); return 0; } @@ -4775,6 +4856,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; afr_private_t *priv = this->private; @@ -4783,6 +4865,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -4799,6 +4882,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_metadata_prepare (frame, this, inode, locked_on, sources, sinks, healed_sinks, + undid_pending, locked_replies, pending); *msh = afr_decide_heal_info (priv, sources, ret); @@ -4822,6 +4906,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; afr_private_t *priv = NULL; fd_t *fd = NULL; struct afr_reply *locked_replies = NULL; @@ -4835,6 +4920,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); /* Heal-info does an open() on the file being examined so that the * current eager-lock holding client, if present, at some point sees @@ -4874,6 +4960,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_data_prepare (frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, pflag); *dsh = afr_decide_heal_info (priv, sources, ret); @@ -5409,3 +5496,115 @@ afr_get_msg_id (char *op_type) return AFR_MSG_ADD_BRICK_STATUS; return -1; } + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame, + void *opaque) +{ + + call_frame_t *txn_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *heal_local = NULL; + xlator_t *this = NULL; + + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + local = txn_frame->local; + this = txn_frame->this; + + /* Refresh the inode agan and proceed with the transaction.*/ + afr_inode_refresh (txn_frame, this, local->inode, NULL, + local->refreshfn); + + if (heal_frame) + AFR_STACK_DESTROY (heal_frame); + + return 0; +} + +int +afr_fav_child_reset_sink_xattrs (void *opaque) +{ + call_frame_t *heal_frame = NULL; + call_frame_t *txn_frame = NULL; + xlator_t *this = NULL; + gf_boolean_t d_spb = _gf_false; + gf_boolean_t m_spb = _gf_false; + afr_local_t *heal_local = NULL; + afr_local_t *txn_local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + int ret = 0; + + heal_frame = (call_frame_t *) opaque; + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + txn_local = txn_frame->local; + this = txn_frame->this; + inode = txn_local->inode; + priv = this->private; + locked_on = alloca0 (priv->child_count); + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, + AFR_DATA_TRANSACTION, &d_spb); + + ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, + AFR_METADATA_TRANSACTION, &m_spb); + + /* Take appropriate locks and reset sink xattrs. */ + if (d_spb) { + ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, + 0, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto data_unlock; + ret = __afr_selfheal_data_prepare (heal_frame, this, + inode, locked_on, + sources, sinks, + healed_sinks, + undid_pending, + locked_replies, + NULL); + } +data_unlock: + afr_selfheal_uninodelk (heal_frame, this, inode, this->name, + 0, 0, locked_on); + } + + if (m_spb) { + memset (locked_on, 0, sizeof (*locked_on) * priv->child_count); + memset (undid_pending, 0, + sizeof (*undid_pending) * priv->child_count); + ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, + LLONG_MAX-1, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare (heal_frame, this, + inode, locked_on, + sources, sinks, + healed_sinks, + undid_pending, + locked_replies, + NULL); + + } +mdata_unlock: + afr_selfheal_uninodelk (heal_frame, this, inode, this->name, + LLONG_MAX-1, 0, locked_on); + } + + return ret; + +} diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 74749f029c8..1c7b4f93d7c 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -64,7 +64,6 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) { afr_local_t *local = NULL; int read_subvol = 0; - int event_generation = 0; inode_t *inode = NULL; int ret = -1; int spb_choice = -1; @@ -76,18 +75,12 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) local->op_errno = -err; local->op_ret = -1; read_subvol = -1; + gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, + "Failing %s on gfid %s: split-brain observed.", + gf_fop_list[local->op], uuid_utoa (inode->gfid)); goto readfn; } - ret = afr_inode_get_readable (frame, inode, this, local->readable, - &event_generation, - local->transaction.type); - - if (ret == -EIO || !event_generation) - /* Even after refresh, we don't have a good - read subvolume. Time to bail */ - AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); - read_subvol = afr_read_subvol_select_by_policy (inode, this, local->readable, NULL); if (read_subvol == -1) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index faee8dbb89b..40cfd01f4e9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -144,8 +144,10 @@ err: int afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, afr_transaction_type type, - struct afr_reply *replies, unsigned char *locked_on) + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -213,6 +215,10 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, and inspected on. */ continue; + if (undid_pending[i]) + /* We already unset the pending xattrs in + * _afr_fav_child_reset_sink_xattrs(). */ + continue; xattr = afr_selfheal_output_xattr (this, local->need_full_crawl, type, output_dirty, @@ -734,6 +740,42 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode) return fav_child; } +int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str) +{ + afr_private_t *priv = NULL; + int fav_child = -1; + + priv = this->private; + switch (priv->fav_child_policy) { + case AFR_FAV_CHILD_BY_SIZE: + fav_child = afr_sh_fav_by_size (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "SIZE"; + break; + case AFR_FAV_CHILD_BY_CTIME: + fav_child = afr_sh_fav_by_ctime (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "CTIME"; + break; + case AFR_FAV_CHILD_BY_MTIME: + fav_child = afr_sh_fav_by_mtime (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "MTIME"; + break; + case AFR_FAV_CHILD_BY_MAJORITY: + fav_child = afr_sh_fav_by_majority (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "MAJORITY"; + break; + case AFR_FAV_CHILD_NONE: + default: + break; + } + + return fav_child; +} int afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, @@ -755,24 +797,9 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, time_t time; priv = this->private; - if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) { - fav_child = afr_sh_fav_by_majority (this, replies, inode); - if (fav_child >= 0) - policy_str = "MAJORITY"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) { - fav_child = afr_sh_fav_by_mtime (this, replies, inode); - if (fav_child >= 0) - policy_str = "MTIME"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) { - fav_child = afr_sh_fav_by_ctime (this, replies, inode); - if (fav_child >= 0) - policy_str = "CTIME"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) { - fav_child = afr_sh_fav_by_size (this, replies, inode); - if (fav_child >= 0) - policy_str = "SIZE"; - } + fav_child = afr_sh_get_fav_by_policy (this, replies, inode, + &policy_str); if (fav_child > priv->child_count - 1) { gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) " @@ -828,6 +855,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, dict_t *xdata_req = NULL; int heal_op = -1; int ret = -1; + int source = -1; local = frame->local; priv = this->private; @@ -837,27 +865,96 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, if (ret) goto autoheal; - ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, + source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, sources, sinks, healed_sinks, locked_on, replies, type, heal_op); - return ret; + return source; autoheal: /* Automatically heal if fav_child_policy is set. */ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) { - ret = afr_mark_split_brain_source_sinks_by_policy (frame, this, - inode, - sources, - sinks, + source = afr_mark_split_brain_source_sinks_by_policy (frame, + this, + inode, + sources, + sinks, healed_sinks, - locked_on, - replies, - type); + locked_on, + replies, + type); + if (source != -1) { + ret = dict_set_int32 (xdata_req, "fav-child-policy", 1); + if (ret) + return -1; + } } - return ret; + return source; +} + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int *input_dirty = NULL; + int **input_matrix = NULL; + int *output_dirty = NULL; + int **output_matrix = NULL; + dict_t *xattr = NULL; + dict_t *xdata = NULL; + int i = 0; + + priv = this->private; + local = frame->local; + + if (!dict_get (local->xdata_req, "fav-child-policy")) + return 0; + + xdata = dict_new(); + if (!xdata) + return -1; + + input_dirty = alloca0 (priv->child_count * sizeof (int)); + input_matrix = ALLOC_MATRIX (priv->child_count, int); + output_dirty = alloca0 (priv->child_count * sizeof (int)); + output_matrix = ALLOC_MATRIX (priv->child_count, int); + + afr_selfheal_extract_xattr (this, replies, type, input_dirty, + input_matrix); + + for (i = 0; i < priv->child_count; i++) { + if (i == source || !healed_sinks[i]) + continue; + output_dirty[i] = -input_dirty[i]; + output_matrix[i][source] = -input_matrix[i][source]; + } + + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] || !locked_on[i]) + continue; + xattr = afr_selfheal_output_xattr (this, _gf_false, type, + output_dirty, output_matrix, + i, NULL); + + afr_selfheal_post_op (frame, this, inode, i, xattr, xdata); + + undid_pending[i] = 1; + dict_unref (xattr); + } + + if (xdata) + dict_unref (xdata); + + return 0; } gf_boolean_t @@ -1895,11 +1992,15 @@ afr_selfheal (xlator_t *this, uuid_t gfid) { int ret = -1; call_frame_t *frame = NULL; + afr_local_t *local = NULL; frame = afr_frame_create (this); if (!frame) return ret; + local = frame->local; + local->xdata_req = dict_new(); + ret = afr_selfheal_do (frame, this, gfid); if (frame) diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 4becfb835e8..1b3b1ca0af1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -579,6 +579,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness) { @@ -599,6 +600,12 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, AFR_DATA_TRANSACTION); if (source < 0) return -EIO; + + _afr_fav_child_reset_sink_xattrs (frame, this, inode, source, + healed_sinks, undid_pending, + AFR_DATA_TRANSACTION, + locked_on, replies); + return source; } @@ -638,6 +645,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *pflag) { int ret = -1; @@ -673,8 +681,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, source = __afr_selfheal_data_finalize_source (frame, this, inode, sources, sinks, healed_sinks, - locked_on, replies, - witness); + locked_on, undid_pending, + replies, witness); if (source < 0) return -EIO; @@ -692,6 +700,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *sinks = NULL; unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; int source = -1; gf_boolean_t did_sh = _gf_true; @@ -703,6 +712,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -722,9 +732,8 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = __afr_selfheal_data_prepare (frame, this, fd->inode, data_lock, sources, sinks, - healed_sinks, - locked_replies, - NULL); + healed_sinks, undid_pending, + locked_replies, NULL); if (ret < 0) goto unlock; @@ -783,7 +792,7 @@ restore_time: } ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, healed_sinks, - AFR_DATA_TRANSACTION, + undid_pending, AFR_DATA_TRANSACTION, locked_replies, data_lock); skip_undo_pending: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 573b5435eae..9ab566b8242 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -897,6 +897,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *data_lock = NULL; unsigned char *postop_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -908,6 +909,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); postop_lock = alloca0 (priv->child_count); @@ -980,6 +982,7 @@ unlock: ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, healed_sinks, + undid_pending, AFR_ENTRY_TRANSACTION, locked_replies, postop_lock); } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 85aaca7cefd..a67717bf9d4 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -202,6 +202,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, unsigned char *locked_on, struct afr_reply *replies) { @@ -223,8 +224,14 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, healed_sinks, locked_on, replies, AFR_METADATA_TRANSACTION); - if (source >= 0) + if (source >= 0) { + _afr_fav_child_reset_sink_xattrs (frame, this, inode, + source, healed_sinks, + undid_pending, + AFR_METADATA_TRANSACTION, + locked_on, replies); return source; + } /* If this is a directory mtime/ctime only split brain use the most recent */ @@ -304,6 +311,7 @@ int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *pflag) { int ret = -1; @@ -358,6 +366,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i source = __afr_selfheal_metadata_finalize_source (frame, this, inode, sources, sinks, healed_sinks, + undid_pending, locked_on, replies); if (source < 0) @@ -375,6 +384,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) unsigned char *sinks = NULL; unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; gf_boolean_t did_sh = _gf_true; int source = -1; @@ -384,6 +394,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -399,6 +410,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = __afr_selfheal_metadata_prepare (frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, NULL); if (ret < 0) goto unlock; @@ -417,6 +429,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_undo_pending (frame, this, inode, sources, sinks, healed_sinks, + undid_pending, AFR_METADATA_TRANSACTION, locked_replies, data_lock); } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 1705c967f60..500227abe24 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -167,8 +167,10 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, int afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, afr_transaction_type type, - struct afr_reply *replies, unsigned char *locked_on); + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on); int afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir, @@ -228,6 +230,19 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, struct afr_reply *replies, afr_transaction_type type); +int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str); + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies); + int afr_get_child_index_from_name (xlator_t *this, char *name); @@ -239,8 +254,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, - struct afr_reply *replies, - gf_boolean_t *flag); + unsigned char *undid_pending, + struct afr_reply *replies, gf_boolean_t *flag); int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, @@ -248,6 +263,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *flag); int diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 4d0ed943a81..9cb735ea7fa 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2261,19 +2261,12 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) if (err) { local->op_errno = -err; local->op_ret = -1; - goto fail; - } - ret = afr_inode_get_readable (frame, local->inode, this, - local->readable, NULL, - local->transaction.type); - if (ret < 0) { gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, "Failing %s on gfid %s: split-brain observed.", gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); - local->op_ret = -1; - local->op_errno = -ret; goto fail; } + afr_transaction_start (frame, this); return 0; fail: -- cgit