summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-12-09 06:09:14 +0000
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-12-12 22:25:22 -0800
commit8d36a3cb838b8ff277a4b41f56cd380325cb7837 (patch)
tree56b90447aeb8368d6f98b308f13a23f29d21a530 /xlators
parentc4eae872b8988ce41f03dd1e7a146ae7dcb0f963 (diff)
afr: allow I/O when favorite-child-policy is enabled
Problem: Currently, I/O on a split-brained file fails even when the favorite-child-policy is set until the self-heal is complete. Fix: If a valid 'source' is found using the set favorite-child-policy,inspect and reset the afr pending xattrs on the 'sinks' (inside appropriate locks),refresh the inode and then proceed with the read or write transaction. The resetting itself happens in the self-heal code and hence can also happen in the client side background-heal or by the shd's index-heal in addition to the txn code path explained above. When it happens in via heal, we also add checks in undo-pending to not reset the sink xattrs again. > Reviewed-on: http://review.gluster.org/15673 > Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> > Smoke: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Change-Id: Ic8c1317720cb26bd114b6fe6af4e58c73b864626 BUG: 1403121 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reported-by: Simon Turcotte-Langevin <simon.turcotte-langevin@ubisoft.com> Reviewed-on: http://review.gluster.org/16088 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c205
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c13
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c159
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c20
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c15
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h24
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c9
8 files changed, 387 insertions, 61 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 176dcbfa13b..ac1da31dc76 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -53,6 +53,13 @@ afr_quorum_errno (afr_private_t *priv)
return EROFS;
}
+int
+afr_fav_child_reset_sink_xattrs (void *opaque);
+
+int
+afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame,
+ void *opaque);
+
gf_boolean_t
afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,
int32_t *op_errno)
@@ -1011,6 +1018,82 @@ afr_selfheal_enabled (xlator_t *this)
return data || priv->metadata_self_heal || priv->entry_self_heal;
}
+
+int
+afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int op_errno = ENOMEM;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable (frame, inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -EIO || !event_generation) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = -EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy (this, local->replies,
+ inode, NULL);
+ if (read_subvol == -1) {
+ err = -EIO;
+ goto refresh_done;
+ }
+
+ heal_frame = copy_frame (frame);
+ if (!heal_frame) {
+ err = -EIO;
+ goto refresh_done;
+ }
+ heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
+ if (!heal_local) {
+ err = -EIO;
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = -EIO;
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->heal_frame = frame;
+ ret = synctask_new (this->ctx->env,
+ afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk,
+ heal_frame,
+ heal_frame);
+ return 0;
+ }
+
+refresh_done:
+ afr_local_replies_wipe (local, this->private);
+ local->refreshfn (frame, this, err);
+
+ return 0;
+}
+
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
@@ -1029,8 +1112,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
err = afr_inode_refresh_err (frame, this);
- afr_local_replies_wipe (local, this->private);
-
if (ret && afr_selfheal_enabled (this) && start_heal) {
heal_frame = copy_frame (frame);
if (!heal_frame)
@@ -1050,7 +1131,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
}
refresh_done:
- local->refreshfn (frame, this, err);
+ afr_txn_refresh_done (frame, this, err);
return 0;
}
@@ -5132,6 +5213,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
afr_private_t *priv = this->private;
@@ -5140,6 +5222,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -5156,6 +5239,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
ret = __afr_selfheal_metadata_prepare (frame, this, inode,
locked_on, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies,
pending);
*msh = afr_decide_heal_info (priv, sources, ret);
@@ -5179,6 +5263,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
afr_private_t *priv = NULL;
fd_t *fd = NULL;
struct afr_reply *locked_replies = NULL;
@@ -5192,6 +5277,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
/* Heal-info does an open() on the file being examined so that the
* current eager-lock holding client, if present, at some point sees
@@ -5231,6 +5317,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
ret = __afr_selfheal_data_prepare (frame, this, inode,
data_lock, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies,
pflag);
*dsh = afr_decide_heal_info (priv, sources, ret);
@@ -5818,3 +5905,115 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
if (newloc_xdata)
dict_unref (newloc_xdata);
}
+
+int
+afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame,
+ void *opaque)
+{
+
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
+
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
+
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh (txn_frame, this, local->inode, NULL,
+ local->refreshfn);
+
+ if (heal_frame)
+ AFR_STACK_DESTROY (heal_frame);
+
+ return 0;
+}
+
+int
+afr_fav_child_reset_sink_xattrs (void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *) opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain (txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain (txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name,
+ 0, 0, locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare (heal_frame, this,
+ inode, locked_on,
+ sources, sinks,
+ healed_sinks,
+ undid_pending,
+ locked_replies,
+ NULL);
+ }
+data_unlock:
+ afr_selfheal_uninodelk (heal_frame, this, inode, this->name,
+ 0, 0, locked_on);
+ }
+
+ if (m_spb) {
+ memset (locked_on, 0, sizeof (*locked_on) * priv->child_count);
+ memset (undid_pending, 0,
+ sizeof (*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name,
+ LLONG_MAX-1, 0, locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare (heal_frame, this,
+ inode, locked_on,
+ sources, sinks,
+ healed_sinks,
+ undid_pending,
+ locked_replies,
+ NULL);
+
+ }
+mdata_unlock:
+ afr_selfheal_uninodelk (heal_frame, this, inode, this->name,
+ LLONG_MAX-1, 0, locked_on);
+ }
+
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index cb81af42510..26b0f1c2a11 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -64,7 +64,6 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
{
afr_local_t *local = NULL;
int read_subvol = 0;
- int event_generation = 0;
inode_t *inode = NULL;
int ret = -1;
int spb_choice = -1;
@@ -76,18 +75,12 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
local->op_errno = -err;
local->op_ret = -1;
read_subvol = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,
+ "Failing %s on gfid %s: split-brain observed.",
+ gf_fop_list[local->op], uuid_utoa (inode->gfid));
goto readfn;
}
- ret = afr_inode_get_readable (frame, inode, this, local->readable,
- &event_generation,
- local->transaction.type);
-
- if (ret == -EIO || !event_generation)
- /* Even after refresh, we don't have a good
- read subvolume. Time to bail */
- AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
-
read_subvol = afr_read_subvol_select_by_policy (inode, this,
local->readable, NULL);
if (read_subvol == -1)
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 858880860e6..17e15d760c6 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -145,8 +145,10 @@ err:
int
afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, afr_transaction_type type,
- struct afr_reply *replies, unsigned char *locked_on)
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -214,6 +216,10 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
and inspected on.
*/
continue;
+ if (undid_pending[i])
+ /* We already unset the pending xattrs in
+ * _afr_fav_child_reset_sink_xattrs(). */
+ continue;
xattr = afr_selfheal_output_xattr (this, local->need_full_crawl,
type, output_dirty,
@@ -735,6 +741,42 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)
return fav_child;
}
+int
+afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+
+ priv = this->private;
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ fav_child = afr_sh_fav_by_size (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "SIZE";
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ fav_child = afr_sh_fav_by_ctime (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "CTIME";
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ fav_child = afr_sh_fav_by_mtime (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "MTIME";
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ fav_child = afr_sh_fav_by_majority (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "MAJORITY";
+ break;
+ case AFR_FAV_CHILD_NONE:
+ default:
+ break;
+ }
+
+ return fav_child;
+}
int
afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
@@ -756,24 +798,9 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
time_t time;
priv = this->private;
- if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) {
- fav_child = afr_sh_fav_by_majority (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "MAJORITY";
- } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) {
- fav_child = afr_sh_fav_by_mtime (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "MTIME";
- } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) {
- fav_child = afr_sh_fav_by_ctime (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "CTIME";
- } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) {
- fav_child = afr_sh_fav_by_size (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "SIZE";
- }
+ fav_child = afr_sh_get_fav_by_policy (this, replies, inode,
+ &policy_str);
if (fav_child > priv->child_count - 1) {
gf_msg (this->name, GF_LOG_ERROR, 0,
AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) "
@@ -829,6 +856,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
dict_t *xdata_req = NULL;
int heal_op = -1;
int ret = -1;
+ int source = -1;
local = frame->local;
priv = this->private;
@@ -838,27 +866,96 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
if (ret)
goto autoheal;
- ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,
+ source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,
sources, sinks,
healed_sinks,
locked_on, replies,
type, heal_op);
- return ret;
+ return source;
autoheal:
/* Automatically heal if fav_child_policy is set. */
if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
- ret = afr_mark_split_brain_source_sinks_by_policy (frame, this,
- inode,
- sources,
- sinks,
+ source = afr_mark_split_brain_source_sinks_by_policy (frame,
+ this,
+ inode,
+ sources,
+ sinks,
healed_sinks,
- locked_on,
- replies,
- type);
+ locked_on,
+ replies,
+ type);
+ if (source != -1) {
+ ret = dict_set_int32 (xdata_req, "fav-child-policy", 1);
+ if (ret)
+ return -1;
+ }
}
- return ret;
+ return source;
+}
+
+int
+_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!dict_get (local->xdata_req, "fav-child-policy"))
+ return 0;
+
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
+
+ input_dirty = alloca0 (priv->child_count * sizeof (int));
+ input_matrix = ALLOC_MATRIX (priv->child_count, int);
+ output_dirty = alloca0 (priv->child_count * sizeof (int));
+ output_matrix = ALLOC_MATRIX (priv->child_count, int);
+
+ afr_selfheal_extract_xattr (this, replies, type, input_dirty,
+ input_matrix);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
+ output_dirty[i] = -input_dirty[i];
+ output_matrix[i][source] = -input_matrix[i][source];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] || !locked_on[i])
+ continue;
+ xattr = afr_selfheal_output_xattr (this, _gf_false, type,
+ output_dirty, output_matrix,
+ i, NULL);
+
+ afr_selfheal_post_op (frame, this, inode, i, xattr, xdata);
+
+ undid_pending[i] = 1;
+ dict_unref (xattr);
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
gf_boolean_t
@@ -1906,11 +2003,15 @@ afr_selfheal (xlator_t *this, uuid_t gfid)
{
int ret = -1;
call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
frame = afr_frame_create (this);
if (!frame)
return ret;
+ local = frame->local;
+ local->xdata_req = dict_new();
+
ret = afr_selfheal_do (frame, this, gfid);
if (frame)
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index fbbbd192323..d032284926c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -580,6 +580,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
unsigned char *sinks,
unsigned char *healed_sinks,
unsigned char *locked_on,
+ unsigned char *undid_pending,
struct afr_reply *replies,
uint64_t *witness)
{
@@ -603,6 +604,11 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
"file=%s", this->name, uuid_utoa(inode->gfid));
return -EIO;
}
+
+ _afr_fav_child_reset_sink_xattrs (frame, this, inode, source,
+ healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION,
+ locked_on, replies);
return source;
}
@@ -642,6 +648,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
inode_t *inode, unsigned char *locked_on,
unsigned char *sources, unsigned char *sinks,
unsigned char *healed_sinks,
+ unsigned char *undid_pending,
struct afr_reply *replies, gf_boolean_t *pflag)
{
int ret = -1;
@@ -677,8 +684,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
source = __afr_selfheal_data_finalize_source (frame, this, inode,
sources, sinks,
healed_sinks,
- locked_on, replies,
- witness);
+ locked_on, undid_pending,
+ replies, witness);
if (source < 0)
return -EIO;
@@ -696,6 +703,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
unsigned char *sinks = NULL;
unsigned char *data_lock = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
int source = -1;
gf_boolean_t did_sh = _gf_true;
@@ -707,6 +715,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -726,9 +735,8 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = __afr_selfheal_data_prepare (frame, this, fd->inode,
data_lock, sources, sinks,
- healed_sinks,
- locked_replies,
- NULL);
+ healed_sinks, undid_pending,
+ locked_replies, NULL);
if (ret < 0)
goto unlock;
@@ -787,7 +795,7 @@ restore_time:
}
ret = afr_selfheal_undo_pending (frame, this, fd->inode,
sources, sinks, healed_sinks,
- AFR_DATA_TRANSACTION,
+ undid_pending, AFR_DATA_TRANSACTION,
locked_replies, data_lock);
skip_undo_pending:
afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a0e361ab987..d8fe5422372 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -913,6 +913,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
unsigned char *data_lock = NULL;
unsigned char *postop_lock = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -924,6 +925,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
postop_lock = alloca0 (priv->child_count);
@@ -996,6 +998,7 @@ unlock:
ret = afr_selfheal_undo_pending (frame, this, fd->inode,
sources, sinks, healed_sinks,
+ undid_pending,
AFR_ENTRY_TRANSACTION,
locked_replies, postop_lock);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 9dfe4a14e8c..5839ddc2e0f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -203,6 +203,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
unsigned char *sources,
unsigned char *sinks,
unsigned char *healed_sinks,
+ unsigned char *undid_pending,
unsigned char *locked_on,
struct afr_reply *replies)
{
@@ -224,8 +225,14 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
healed_sinks,
locked_on, replies,
AFR_METADATA_TRANSACTION);
- if (source >= 0)
+ if (source >= 0) {
+ _afr_fav_child_reset_sink_xattrs (frame, this, inode,
+ source, healed_sinks,
+ undid_pending,
+ AFR_METADATA_TRANSACTION,
+ locked_on, replies);
return source;
+ }
/* If this is a directory mtime/ctime only split brain
use the most recent */
@@ -308,6 +315,7 @@ int
__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *locked_on, unsigned char *sources,
unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
struct afr_reply *replies, gf_boolean_t *pflag)
{
int ret = -1;
@@ -362,6 +370,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i
source = __afr_selfheal_metadata_finalize_source (frame, this, inode,
sources, sinks,
healed_sinks,
+ undid_pending,
locked_on, replies);
if (source < 0)
@@ -379,6 +388,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
unsigned char *sinks = NULL;
unsigned char *data_lock = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
gf_boolean_t did_sh = _gf_true;
int source = -1;
@@ -388,6 +398,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -403,6 +414,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = __afr_selfheal_metadata_prepare (frame, this, inode,
data_lock, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies, NULL);
if (ret < 0)
goto unlock;
@@ -421,6 +433,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_undo_pending (frame, this, inode, sources,
sinks, healed_sinks,
+ undid_pending,
AFR_METADATA_TRANSACTION,
locked_replies, data_lock);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 1705c967f60..500227abe24 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -167,8 +167,10 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
int
afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, afr_transaction_type type,
- struct afr_reply *replies, unsigned char *locked_on);
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on);
int
afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir,
@@ -229,6 +231,19 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
afr_transaction_type type);
int
+afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str);
+
+int
+_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies);
+
+int
afr_get_child_index_from_name (xlator_t *this, char *name);
gf_boolean_t
@@ -239,8 +254,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
inode_t *inode, unsigned char *locked_on,
unsigned char *sources,
unsigned char *sinks, unsigned char *healed_sinks,
- struct afr_reply *replies,
- gf_boolean_t *flag);
+ unsigned char *undid_pending,
+ struct afr_reply *replies, gf_boolean_t *flag);
int
__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
@@ -248,6 +263,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
unsigned char *sources,
unsigned char *sinks,
unsigned char *healed_sinks,
+ unsigned char *undid_pending,
struct afr_reply *replies,
gf_boolean_t *flag);
int
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index a8979cc6184..bf4ea944e23 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2581,19 +2581,12 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
if (err) {
local->op_errno = -err;
local->op_ret = -1;
- goto fail;
- }
- ret = afr_inode_get_readable (frame, local->inode, this,
- local->readable, NULL,
- local->transaction.type);
- if (ret < 0) {
gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN,
"Failing %s on gfid %s: split-brain observed.",
gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
- local->op_ret = -1;
- local->op_errno = -ret;
goto fail;
}
+
afr_transaction_start (frame, this);
return 0;
fail: