From a07ddd8fcc8dcdcf7ccfa61211d258f13b9f9229 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Sat, 26 Nov 2016 21:24:01 +0530 Subject: afr: allow I/O when favorite-child-policy is enabled Problem: Currently, I/O on a split-brained file fails even when the favorite-child-policy is set until the self-heal is complete. Fix: If a valid 'source' is found using the set favorite-child-policy, inspect and reset the afr pending xattrs on the 'sinks' (inside appropriate locks), refresh the inode and then proceed with the read or write transaction. The resetting itself happens in the self-heal code and hence can also happen in the client side background-heal or by the shd's index-heal in addition to the txn code path explained above. When it happens in via heal, we also add checks in undo-pending to not reset the sink xattrs again. Change-Id: Ic8c1317720cb26bd114b6fe6af4e58c73b864626 BUG: 1386188 Signed-off-by: Ravishankar N Reported-by: Simon Turcotte-Langevin Reviewed-on: http://review.gluster.org/15673 Tested-by: Pranith Kumar Karampuri Smoke: Gluster Build System Reviewed-by: Pranith Kumar Karampuri NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System --- xlators/cluster/afr/src/afr-common.c | 205 ++++++++++++++++++++++- xlators/cluster/afr/src/afr-read-txn.c | 13 +- xlators/cluster/afr/src/afr-self-heal-common.c | 159 ++++++++++++++---- xlators/cluster/afr/src/afr-self-heal-data.c | 20 ++- xlators/cluster/afr/src/afr-self-heal-entry.c | 3 + xlators/cluster/afr/src/afr-self-heal-metadata.c | 15 +- xlators/cluster/afr/src/afr-self-heal.h | 24 ++- xlators/cluster/afr/src/afr-transaction.c | 9 +- 8 files changed, 387 insertions(+), 61 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 7173e8b032e..024f0f5f589 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -53,6 +53,13 @@ afr_quorum_errno (afr_private_t *priv) return EROFS; } +int +afr_fav_child_reset_sink_xattrs (void *opaque); + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame, + void *opaque); + gf_boolean_t afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) @@ -1011,6 +1018,82 @@ afr_selfheal_enabled (xlator_t *this) return data || priv->metadata_self_heal || priv->entry_self_heal; } + +int +afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + + call_frame_t *heal_frame = NULL; + afr_local_t *heal_local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + int event_generation = 0; + int read_subvol = -1; + int op_errno = ENOMEM; + int ret = 0; + + local = frame->local; + inode = local->inode; + priv = this->private; + + if (err) + goto refresh_done; + + if (local->op == GF_FOP_LOOKUP) + goto refresh_done; + + ret = afr_inode_get_readable (frame, inode, this, local->readable, + &event_generation, + local->transaction.type); + + if (ret == -EIO || !event_generation) { + /* No readable subvolume even after refresh ==> splitbrain.*/ + if (!priv->fav_child_policy) { + err = -EIO; + goto refresh_done; + } + read_subvol = afr_sh_get_fav_by_policy (this, local->replies, + inode, NULL); + if (read_subvol == -1) { + err = -EIO; + goto refresh_done; + } + + heal_frame = copy_frame (frame); + if (!heal_frame) { + err = -EIO; + goto refresh_done; + } + heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + heal_local = AFR_FRAME_INIT (heal_frame, op_errno); + if (!heal_local) { + err = -EIO; + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->xdata_req = dict_new(); + if (!heal_local->xdata_req) { + err = -EIO; + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->heal_frame = frame; + ret = synctask_new (this->ctx->env, + afr_fav_child_reset_sink_xattrs, + afr_fav_child_reset_sink_xattrs_cbk, + heal_frame, + heal_frame); + return 0; + } + +refresh_done: + afr_local_replies_wipe (local, this->private); + local->refreshfn (frame, this, err); + + return 0; +} + int afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) { @@ -1029,8 +1112,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) err = afr_inode_refresh_err (frame, this); - afr_local_replies_wipe (local, this->private); - if (ret && afr_selfheal_enabled (this) && start_heal) { heal_frame = copy_frame (frame); if (!heal_frame) @@ -1047,7 +1128,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) } refresh_done: - local->refreshfn (frame, this, err); + afr_txn_refresh_done (frame, this, err); return 0; } @@ -5110,6 +5191,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; afr_private_t *priv = this->private; @@ -5118,6 +5200,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -5134,6 +5217,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_metadata_prepare (frame, this, inode, locked_on, sources, sinks, healed_sinks, + undid_pending, locked_replies, pending); *msh = afr_decide_heal_info (priv, sources, ret); @@ -5157,6 +5241,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; afr_private_t *priv = NULL; fd_t *fd = NULL; struct afr_reply *locked_replies = NULL; @@ -5170,6 +5255,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); /* Heal-info does an open() on the file being examined so that the * current eager-lock holding client, if present, at some point sees @@ -5209,6 +5295,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_data_prepare (frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, pflag); *dsh = afr_decide_heal_info (priv, sources, ret); @@ -5796,3 +5883,115 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata, if (newloc_xdata) dict_unref (newloc_xdata); } + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame, + void *opaque) +{ + + call_frame_t *txn_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *heal_local = NULL; + xlator_t *this = NULL; + + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + local = txn_frame->local; + this = txn_frame->this; + + /* Refresh the inode agan and proceed with the transaction.*/ + afr_inode_refresh (txn_frame, this, local->inode, NULL, + local->refreshfn); + + if (heal_frame) + AFR_STACK_DESTROY (heal_frame); + + return 0; +} + +int +afr_fav_child_reset_sink_xattrs (void *opaque) +{ + call_frame_t *heal_frame = NULL; + call_frame_t *txn_frame = NULL; + xlator_t *this = NULL; + gf_boolean_t d_spb = _gf_false; + gf_boolean_t m_spb = _gf_false; + afr_local_t *heal_local = NULL; + afr_local_t *txn_local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + int ret = 0; + + heal_frame = (call_frame_t *) opaque; + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + txn_local = txn_frame->local; + this = txn_frame->this; + inode = txn_local->inode; + priv = this->private; + locked_on = alloca0 (priv->child_count); + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, + AFR_DATA_TRANSACTION, &d_spb); + + ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, + AFR_METADATA_TRANSACTION, &m_spb); + + /* Take appropriate locks and reset sink xattrs. */ + if (d_spb) { + ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, + 0, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto data_unlock; + ret = __afr_selfheal_data_prepare (heal_frame, this, + inode, locked_on, + sources, sinks, + healed_sinks, + undid_pending, + locked_replies, + NULL); + } +data_unlock: + afr_selfheal_uninodelk (heal_frame, this, inode, this->name, + 0, 0, locked_on); + } + + if (m_spb) { + memset (locked_on, 0, sizeof (*locked_on) * priv->child_count); + memset (undid_pending, 0, + sizeof (*undid_pending) * priv->child_count); + ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, + LLONG_MAX-1, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare (heal_frame, this, + inode, locked_on, + sources, sinks, + healed_sinks, + undid_pending, + locked_replies, + NULL); + + } +mdata_unlock: + afr_selfheal_uninodelk (heal_frame, this, inode, this->name, + LLONG_MAX-1, 0, locked_on); + } + + return ret; + +} diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index cb81af42510..26b0f1c2a11 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -64,7 +64,6 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) { afr_local_t *local = NULL; int read_subvol = 0; - int event_generation = 0; inode_t *inode = NULL; int ret = -1; int spb_choice = -1; @@ -76,18 +75,12 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) local->op_errno = -err; local->op_ret = -1; read_subvol = -1; + gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, + "Failing %s on gfid %s: split-brain observed.", + gf_fop_list[local->op], uuid_utoa (inode->gfid)); goto readfn; } - ret = afr_inode_get_readable (frame, inode, this, local->readable, - &event_generation, - local->transaction.type); - - if (ret == -EIO || !event_generation) - /* Even after refresh, we don't have a good - read subvolume. Time to bail */ - AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); - read_subvol = afr_read_subvol_select_by_policy (inode, this, local->readable, NULL); if (read_subvol == -1) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b6720ccfa5c..c3b62e3781a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -145,8 +145,10 @@ err: int afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, afr_transaction_type type, - struct afr_reply *replies, unsigned char *locked_on) + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -214,6 +216,10 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, and inspected on. */ continue; + if (undid_pending[i]) + /* We already unset the pending xattrs in + * _afr_fav_child_reset_sink_xattrs(). */ + continue; xattr = afr_selfheal_output_xattr (this, local->need_full_crawl, type, output_dirty, @@ -735,6 +741,42 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode) return fav_child; } +int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str) +{ + afr_private_t *priv = NULL; + int fav_child = -1; + + priv = this->private; + switch (priv->fav_child_policy) { + case AFR_FAV_CHILD_BY_SIZE: + fav_child = afr_sh_fav_by_size (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "SIZE"; + break; + case AFR_FAV_CHILD_BY_CTIME: + fav_child = afr_sh_fav_by_ctime (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "CTIME"; + break; + case AFR_FAV_CHILD_BY_MTIME: + fav_child = afr_sh_fav_by_mtime (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "MTIME"; + break; + case AFR_FAV_CHILD_BY_MAJORITY: + fav_child = afr_sh_fav_by_majority (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "MAJORITY"; + break; + case AFR_FAV_CHILD_NONE: + default: + break; + } + + return fav_child; +} int afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, @@ -756,24 +798,9 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, time_t time; priv = this->private; - if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) { - fav_child = afr_sh_fav_by_majority (this, replies, inode); - if (fav_child >= 0) - policy_str = "MAJORITY"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) { - fav_child = afr_sh_fav_by_mtime (this, replies, inode); - if (fav_child >= 0) - policy_str = "MTIME"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) { - fav_child = afr_sh_fav_by_ctime (this, replies, inode); - if (fav_child >= 0) - policy_str = "CTIME"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) { - fav_child = afr_sh_fav_by_size (this, replies, inode); - if (fav_child >= 0) - policy_str = "SIZE"; - } + fav_child = afr_sh_get_fav_by_policy (this, replies, inode, + &policy_str); if (fav_child > priv->child_count - 1) { gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) " @@ -829,6 +856,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, dict_t *xdata_req = NULL; int heal_op = -1; int ret = -1; + int source = -1; local = frame->local; priv = this->private; @@ -838,27 +866,96 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, if (ret) goto autoheal; - ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, + source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, sources, sinks, healed_sinks, locked_on, replies, type, heal_op); - return ret; + return source; autoheal: /* Automatically heal if fav_child_policy is set. */ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) { - ret = afr_mark_split_brain_source_sinks_by_policy (frame, this, - inode, - sources, - sinks, + source = afr_mark_split_brain_source_sinks_by_policy (frame, + this, + inode, + sources, + sinks, healed_sinks, - locked_on, - replies, - type); + locked_on, + replies, + type); + if (source != -1) { + ret = dict_set_int32 (xdata_req, "fav-child-policy", 1); + if (ret) + return -1; + } } - return ret; + return source; +} + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int *input_dirty = NULL; + int **input_matrix = NULL; + int *output_dirty = NULL; + int **output_matrix = NULL; + dict_t *xattr = NULL; + dict_t *xdata = NULL; + int i = 0; + + priv = this->private; + local = frame->local; + + if (!dict_get (local->xdata_req, "fav-child-policy")) + return 0; + + xdata = dict_new(); + if (!xdata) + return -1; + + input_dirty = alloca0 (priv->child_count * sizeof (int)); + input_matrix = ALLOC_MATRIX (priv->child_count, int); + output_dirty = alloca0 (priv->child_count * sizeof (int)); + output_matrix = ALLOC_MATRIX (priv->child_count, int); + + afr_selfheal_extract_xattr (this, replies, type, input_dirty, + input_matrix); + + for (i = 0; i < priv->child_count; i++) { + if (i == source || !healed_sinks[i]) + continue; + output_dirty[i] = -input_dirty[i]; + output_matrix[i][source] = -input_matrix[i][source]; + } + + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] || !locked_on[i]) + continue; + xattr = afr_selfheal_output_xattr (this, _gf_false, type, + output_dirty, output_matrix, + i, NULL); + + afr_selfheal_post_op (frame, this, inode, i, xattr, xdata); + + undid_pending[i] = 1; + dict_unref (xattr); + } + + if (xdata) + dict_unref (xdata); + + return 0; } gf_boolean_t @@ -1906,11 +2003,15 @@ afr_selfheal (xlator_t *this, uuid_t gfid) { int ret = -1; call_frame_t *frame = NULL; + afr_local_t *local = NULL; frame = afr_frame_create (this); if (!frame) return ret; + local = frame->local; + local->xdata_req = dict_new(); + ret = afr_selfheal_do (frame, this, gfid); if (frame) diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index fbbbd192323..d032284926c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -580,6 +580,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness) { @@ -603,6 +604,11 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, "file=%s", this->name, uuid_utoa(inode->gfid)); return -EIO; } + + _afr_fav_child_reset_sink_xattrs (frame, this, inode, source, + healed_sinks, undid_pending, + AFR_DATA_TRANSACTION, + locked_on, replies); return source; } @@ -642,6 +648,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *pflag) { int ret = -1; @@ -677,8 +684,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, source = __afr_selfheal_data_finalize_source (frame, this, inode, sources, sinks, healed_sinks, - locked_on, replies, - witness); + locked_on, undid_pending, + replies, witness); if (source < 0) return -EIO; @@ -696,6 +703,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *sinks = NULL; unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; int source = -1; gf_boolean_t did_sh = _gf_true; @@ -707,6 +715,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -726,9 +735,8 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = __afr_selfheal_data_prepare (frame, this, fd->inode, data_lock, sources, sinks, - healed_sinks, - locked_replies, - NULL); + healed_sinks, undid_pending, + locked_replies, NULL); if (ret < 0) goto unlock; @@ -787,7 +795,7 @@ restore_time: } ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, healed_sinks, - AFR_DATA_TRANSACTION, + undid_pending, AFR_DATA_TRANSACTION, locked_replies, data_lock); skip_undo_pending: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index a0e361ab987..d8fe5422372 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -913,6 +913,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *data_lock = NULL; unsigned char *postop_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -924,6 +925,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); postop_lock = alloca0 (priv->child_count); @@ -996,6 +998,7 @@ unlock: ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, healed_sinks, + undid_pending, AFR_ENTRY_TRANSACTION, locked_replies, postop_lock); } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 9dfe4a14e8c..5839ddc2e0f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -203,6 +203,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, unsigned char *locked_on, struct afr_reply *replies) { @@ -224,8 +225,14 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, healed_sinks, locked_on, replies, AFR_METADATA_TRANSACTION); - if (source >= 0) + if (source >= 0) { + _afr_fav_child_reset_sink_xattrs (frame, this, inode, + source, healed_sinks, + undid_pending, + AFR_METADATA_TRANSACTION, + locked_on, replies); return source; + } /* If this is a directory mtime/ctime only split brain use the most recent */ @@ -308,6 +315,7 @@ int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *pflag) { int ret = -1; @@ -362,6 +370,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i source = __afr_selfheal_metadata_finalize_source (frame, this, inode, sources, sinks, healed_sinks, + undid_pending, locked_on, replies); if (source < 0) @@ -379,6 +388,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) unsigned char *sinks = NULL; unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; gf_boolean_t did_sh = _gf_true; int source = -1; @@ -388,6 +398,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -403,6 +414,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = __afr_selfheal_metadata_prepare (frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, NULL); if (ret < 0) goto unlock; @@ -421,6 +433,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_undo_pending (frame, this, inode, sources, sinks, healed_sinks, + undid_pending, AFR_METADATA_TRANSACTION, locked_replies, data_lock); } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index ec5337e60b2..80b7f3a125d 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -167,8 +167,10 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, int afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, afr_transaction_type type, - struct afr_reply *replies, unsigned char *locked_on); + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on); int afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir, @@ -228,6 +230,19 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, struct afr_reply *replies, afr_transaction_type type); +int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str); + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies); + int afr_get_child_index_from_name (xlator_t *this, char *name); @@ -239,8 +254,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, - struct afr_reply *replies, - gf_boolean_t *flag); + unsigned char *undid_pending, + struct afr_reply *replies, gf_boolean_t *flag); int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, @@ -248,6 +263,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *flag); int diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 4906921ca6a..e6878eb35ff 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2527,19 +2527,12 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) if (err) { local->op_errno = -err; local->op_ret = -1; - goto fail; - } - ret = afr_inode_get_readable (frame, local->inode, this, - local->readable, NULL, - local->transaction.type); - if (ret < 0) { gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, "Failing %s on gfid %s: split-brain observed.", gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); - local->op_ret = -1; - local->op_errno = -ret; goto fail; } + afr_transaction_start (frame, this); return 0; fail: -- cgit