summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/replicate/bug-1386188-sbrain-fav-child.t82
-rw-r--r--xlators/cluster/afr/src/afr-common.c205
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c13
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c159
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c20
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c15
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h24
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c9
9 files changed, 469 insertions, 61 deletions
diff --git a/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
new file mode 100644
index 00000000000..d049d95ef9a
--- /dev/null
+++ b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/data.txt
+TEST touch $M0/mdata.txt
+
+#Create data and metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value1 $M0/mdata.txt
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value2 $M0/mdata.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+## Check that the file still in split-brain,
+ ## I/O fails
+ cat $M0/data.txt > /dev/null
+ EXPECT "1" echo $?
+ ## pending xattrs blame each other.
+ brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/data.txt)
+ brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST [ $brick0_pending -ne "000000000000000000000000" ]
+ TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+ ## I/O fails
+ getfattr -n user.value $M0/mdata.txt
+ EXPECT "1" echo $?
+ brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/mdata.txt)
+ brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/mdata.txt)
+ TEST [ $brick0_pending -ne "000000000000000000000000" ]
+ TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+## Let us use mtime as fav-child policy. So brick0 will be source.
+ # Set dirty (data part) on the sink brick to check if it is reset later along with the pending xattr.
+ TEST setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/${V0}1/data.txt
+ # Set dirty (metadata part) on the sink brick to check if it is reset later along with the pending xattr.
+ TEST setfattr -n trusted.afr.dirty -v 0x000000000000000100000000 $B0/${V0}1/mdata.txt
+
+ TEST $CLI volume set $V0 favorite-child-policy mtime
+
+ # Reading the file should be allowed and sink brick xattrs must be reset.
+ cat $M0/data.txt > /dev/null
+ EXPECT "0" echo $?
+ TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+ TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+ TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+ # Accessing the file should be allowed and sink brick xattrs must be reset.
+ EXPECT "value2" echo $(getfattr --only-values -n user.value $M0/mdata.txt)
+ TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+ TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+ TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 7173e8b032e..024f0f5f589 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -53,6 +53,13 @@ afr_quorum_errno (afr_private_t *priv)
return EROFS;
}
+int
+afr_fav_child_reset_sink_xattrs (void *opaque);
+
+int
+afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame,
+ void *opaque);
+
gf_boolean_t
afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,
int32_t *op_errno)
@@ -1011,6 +1018,82 @@ afr_selfheal_enabled (xlator_t *this)
return data || priv->metadata_self_heal || priv->entry_self_heal;
}
+
+int
+afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int op_errno = ENOMEM;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable (frame, inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -EIO || !event_generation) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = -EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy (this, local->replies,
+ inode, NULL);
+ if (read_subvol == -1) {
+ err = -EIO;
+ goto refresh_done;
+ }
+
+ heal_frame = copy_frame (frame);
+ if (!heal_frame) {
+ err = -EIO;
+ goto refresh_done;
+ }
+ heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
+ if (!heal_local) {
+ err = -EIO;
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = -EIO;
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->heal_frame = frame;
+ ret = synctask_new (this->ctx->env,
+ afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk,
+ heal_frame,
+ heal_frame);
+ return 0;
+ }
+
+refresh_done:
+ afr_local_replies_wipe (local, this->private);
+ local->refreshfn (frame, this, err);
+
+ return 0;
+}
+
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
@@ -1029,8 +1112,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
err = afr_inode_refresh_err (frame, this);
- afr_local_replies_wipe (local, this->private);
-
if (ret && afr_selfheal_enabled (this) && start_heal) {
heal_frame = copy_frame (frame);
if (!heal_frame)
@@ -1047,7 +1128,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
}
refresh_done:
- local->refreshfn (frame, this, err);
+ afr_txn_refresh_done (frame, this, err);
return 0;
}
@@ -5110,6 +5191,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
afr_private_t *priv = this->private;
@@ -5118,6 +5200,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -5134,6 +5217,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
ret = __afr_selfheal_metadata_prepare (frame, this, inode,
locked_on, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies,
pending);
*msh = afr_decide_heal_info (priv, sources, ret);
@@ -5157,6 +5241,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
afr_private_t *priv = NULL;
fd_t *fd = NULL;
struct afr_reply *locked_replies = NULL;
@@ -5170,6 +5255,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
/* Heal-info does an open() on the file being examined so that the
* current eager-lock holding client, if present, at some point sees
@@ -5209,6 +5295,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
ret = __afr_selfheal_data_prepare (frame, this, inode,
data_lock, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies,
pflag);
*dsh = afr_decide_heal_info (priv, sources, ret);
@@ -5796,3 +5883,115 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
if (newloc_xdata)
dict_unref (newloc_xdata);
}
+
+int
+afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame,
+ void *opaque)
+{
+
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
+
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
+
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh (txn_frame, this, local->inode, NULL,
+ local->refreshfn);
+
+ if (heal_frame)
+ AFR_STACK_DESTROY (heal_frame);
+
+ return 0;
+}
+
+int
+afr_fav_child_reset_sink_xattrs (void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *) opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain (txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain (txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name,
+ 0, 0, locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare (heal_frame, this,
+ inode, locked_on,
+ sources, sinks,
+ healed_sinks,
+ undid_pending,
+ locked_replies,
+ NULL);
+ }
+data_unlock:
+ afr_selfheal_uninodelk (heal_frame, this, inode, this->name,
+ 0, 0, locked_on);
+ }
+
+ if (m_spb) {
+ memset (locked_on, 0, sizeof (*locked_on) * priv->child_count);
+ memset (undid_pending, 0,
+ sizeof (*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name,
+ LLONG_MAX-1, 0, locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare (heal_frame, this,
+ inode, locked_on,
+ sources, sinks,
+ healed_sinks,
+ undid_pending,
+ locked_replies,
+ NULL);
+
+ }
+mdata_unlock:
+ afr_selfheal_uninodelk (heal_frame, this, inode, this->name,
+ LLONG_MAX-1, 0, locked_on);
+ }
+
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index cb81af42510..26b0f1c2a11 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -64,7 +64,6 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
{
afr_local_t *local = NULL;
int read_subvol = 0;
- int event_generation = 0;
inode_t *inode = NULL;
int ret = -1;
int spb_choice = -1;
@@ -76,18 +75,12 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
local->op_errno = -err;
local->op_ret = -1;
read_subvol = -1;
+ gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,
+ "Failing %s on gfid %s: split-brain observed.",
+ gf_fop_list[local->op], uuid_utoa (inode->gfid));
goto readfn;
}
- ret = afr_inode_get_readable (frame, inode, this, local->readable,
- &event_generation,
- local->transaction.type);
-
- if (ret == -EIO || !event_generation)
- /* Even after refresh, we don't have a good
- read subvolume. Time to bail */
- AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
-
read_subvol = afr_read_subvol_select_by_policy (inode, this,
local->readable, NULL);
if (read_subvol == -1)
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index b6720ccfa5c..c3b62e3781a 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -145,8 +145,10 @@ err:
int
afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, afr_transaction_type type,
- struct afr_reply *replies, unsigned char *locked_on)
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -214,6 +216,10 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
and inspected on.
*/
continue;
+ if (undid_pending[i])
+ /* We already unset the pending xattrs in
+ * _afr_fav_child_reset_sink_xattrs(). */
+ continue;
xattr = afr_selfheal_output_xattr (this, local->need_full_crawl,
type, output_dirty,
@@ -735,6 +741,42 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)
return fav_child;
}
+int
+afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+
+ priv = this->private;
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ fav_child = afr_sh_fav_by_size (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "SIZE";
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ fav_child = afr_sh_fav_by_ctime (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "CTIME";
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ fav_child = afr_sh_fav_by_mtime (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "MTIME";
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ fav_child = afr_sh_fav_by_majority (this, replies, inode);
+ if (policy_str && fav_child >= 0)
+ *policy_str = "MAJORITY";
+ break;
+ case AFR_FAV_CHILD_NONE:
+ default:
+ break;
+ }
+
+ return fav_child;
+}
int
afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
@@ -756,24 +798,9 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
time_t time;
priv = this->private;
- if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) {
- fav_child = afr_sh_fav_by_majority (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "MAJORITY";
- } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) {
- fav_child = afr_sh_fav_by_mtime (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "MTIME";
- } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) {
- fav_child = afr_sh_fav_by_ctime (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "CTIME";
- } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) {
- fav_child = afr_sh_fav_by_size (this, replies, inode);
- if (fav_child >= 0)
- policy_str = "SIZE";
- }
+ fav_child = afr_sh_get_fav_by_policy (this, replies, inode,
+ &policy_str);
if (fav_child > priv->child_count - 1) {
gf_msg (this->name, GF_LOG_ERROR, 0,
AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) "
@@ -829,6 +856,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
dict_t *xdata_req = NULL;
int heal_op = -1;
int ret = -1;
+ int source = -1;
local = frame->local;
priv = this->private;
@@ -838,27 +866,96 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
if (ret)
goto autoheal;
- ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,
+ source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,
sources, sinks,
healed_sinks,
locked_on, replies,
type, heal_op);
- return ret;
+ return source;
autoheal:
/* Automatically heal if fav_child_policy is set. */
if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
- ret = afr_mark_split_brain_source_sinks_by_policy (frame, this,
- inode,
- sources,
- sinks,
+ source = afr_mark_split_brain_source_sinks_by_policy (frame,
+ this,
+ inode,
+ sources,
+ sinks,
healed_sinks,
- locked_on,
- replies,
- type);
+ locked_on,
+ replies,
+ type);
+ if (source != -1) {
+ ret = dict_set_int32 (xdata_req, "fav-child-policy", 1);
+ if (ret)
+ return -1;
+ }
}
- return ret;
+ return source;
+}
+
+int
+_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!dict_get (local->xdata_req, "fav-child-policy"))
+ return 0;
+
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
+
+ input_dirty = alloca0 (priv->child_count * sizeof (int));
+ input_matrix = ALLOC_MATRIX (priv->child_count, int);
+ output_dirty = alloca0 (priv->child_count * sizeof (int));
+ output_matrix = ALLOC_MATRIX (priv->child_count, int);
+
+ afr_selfheal_extract_xattr (this, replies, type, input_dirty,
+ input_matrix);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
+ output_dirty[i] = -input_dirty[i];
+ output_matrix[i][source] = -input_matrix[i][source];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] || !locked_on[i])
+ continue;
+ xattr = afr_selfheal_output_xattr (this, _gf_false, type,
+ output_dirty, output_matrix,
+ i, NULL);
+
+ afr_selfheal_post_op (frame, this, inode, i, xattr, xdata);
+
+ undid_pending[i] = 1;
+ dict_unref (xattr);
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
gf_boolean_t
@@ -1906,11 +2003,15 @@ afr_selfheal (xlator_t *this, uuid_t gfid)
{
int ret = -1;
call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
frame = afr_frame_create (this);
if (!frame)
return ret;
+ local = frame->local;
+ local->xdata_req = dict_new();
+
ret = afr_selfheal_do (frame, this, gfid);
if (frame)
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index fbbbd192323..d032284926c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -580,6 +580,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
unsigned char *sinks,
unsigned char *healed_sinks,
unsigned char *locked_on,
+ unsigned char *undid_pending,
struct afr_reply *replies,
uint64_t *witness)
{
@@ -603,6 +604,11 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
"file=%s", this->name, uuid_utoa(inode->gfid));
return -EIO;
}
+
+ _afr_fav_child_reset_sink_xattrs (frame, this, inode, source,
+ healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION,
+ locked_on, replies);
return source;
}
@@ -642,6 +648,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
inode_t *inode, unsigned char *locked_on,
unsigned char *sources, unsigned char *sinks,
unsigned char *healed_sinks,
+ unsigned char *undid_pending,
struct afr_reply *replies, gf_boolean_t *pflag)
{
int ret = -1;
@@ -677,8 +684,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
source = __afr_selfheal_data_finalize_source (frame, this, inode,
sources, sinks,
healed_sinks,
- locked_on, replies,
- witness);
+ locked_on, undid_pending,
+ replies, witness);
if (source < 0)
return -EIO;
@@ -696,6 +703,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
unsigned char *sinks = NULL;
unsigned char *data_lock = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
int source = -1;
gf_boolean_t did_sh = _gf_true;
@@ -707,6 +715,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -726,9 +735,8 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = __afr_selfheal_data_prepare (frame, this, fd->inode,
data_lock, sources, sinks,
- healed_sinks,
- locked_replies,
- NULL);
+ healed_sinks, undid_pending,
+ locked_replies, NULL);
if (ret < 0)
goto unlock;
@@ -787,7 +795,7 @@ restore_time:
}
ret = afr_selfheal_undo_pending (frame, this, fd->inode,
sources, sinks, healed_sinks,
- AFR_DATA_TRANSACTION,
+ undid_pending, AFR_DATA_TRANSACTION,
locked_replies, data_lock);
skip_undo_pending:
afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a0e361ab987..d8fe5422372 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -913,6 +913,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
unsigned char *data_lock = NULL;
unsigned char *postop_lock = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -924,6 +925,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
postop_lock = alloca0 (priv->child_count);
@@ -996,6 +998,7 @@ unlock:
ret = afr_selfheal_undo_pending (frame, this, fd->inode,
sources, sinks, healed_sinks,
+ undid_pending,
AFR_ENTRY_TRANSACTION,
locked_replies, postop_lock);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 9dfe4a14e8c..5839ddc2e0f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -203,6 +203,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
unsigned char *sources,
unsigned char *sinks,
unsigned char *healed_sinks,
+ unsigned char *undid_pending,
unsigned char *locked_on,
struct afr_reply *replies)
{
@@ -224,8 +225,14 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
healed_sinks,
locked_on, replies,
AFR_METADATA_TRANSACTION);
- if (source >= 0)
+ if (source >= 0) {
+ _afr_fav_child_reset_sink_xattrs (frame, this, inode,
+ source, healed_sinks,
+ undid_pending,
+ AFR_METADATA_TRANSACTION,
+ locked_on, replies);
return source;
+ }
/* If this is a directory mtime/ctime only split brain
use the most recent */
@@ -308,6 +315,7 @@ int
__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *locked_on, unsigned char *sources,
unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
struct afr_reply *replies, gf_boolean_t *pflag)
{
int ret = -1;
@@ -362,6 +370,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i
source = __afr_selfheal_metadata_finalize_source (frame, this, inode,
sources, sinks,
healed_sinks,
+ undid_pending,
locked_on, replies);
if (source < 0)
@@ -379,6 +388,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
unsigned char *sinks = NULL;
unsigned char *data_lock = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
gf_boolean_t did_sh = _gf_true;
int source = -1;
@@ -388,6 +398,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
data_lock = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -403,6 +414,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = __afr_selfheal_metadata_prepare (frame, this, inode,
data_lock, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies, NULL);
if (ret < 0)
goto unlock;
@@ -421,6 +433,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_undo_pending (frame, this, inode, sources,
sinks, healed_sinks,
+ undid_pending,
AFR_METADATA_TRANSACTION,
locked_replies, data_lock);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index ec5337e60b2..80b7f3a125d 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -167,8 +167,10 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
int
afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *sources, unsigned char *sinks,
- unsigned char *healed_sinks, afr_transaction_type type,
- struct afr_reply *replies, unsigned char *locked_on);
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on);
int
afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir,
@@ -229,6 +231,19 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
afr_transaction_type type);
int
+afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str);
+
+int
+_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies);
+
+int
afr_get_child_index_from_name (xlator_t *this, char *name);
gf_boolean_t
@@ -239,8 +254,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,
inode_t *inode, unsigned char *locked_on,
unsigned char *sources,
unsigned char *sinks, unsigned char *healed_sinks,
- struct afr_reply *replies,
- gf_boolean_t *flag);
+ unsigned char *undid_pending,
+ struct afr_reply *replies, gf_boolean_t *flag);
int
__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
@@ -248,6 +263,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,
unsigned char *sources,
unsigned char *sinks,
unsigned char *healed_sinks,
+ unsigned char *undid_pending,
struct afr_reply *replies,
gf_boolean_t *flag);
int
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 4906921ca6a..e6878eb35ff 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2527,19 +2527,12 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
if (err) {
local->op_errno = -err;
local->op_ret = -1;
- goto fail;
- }
- ret = afr_inode_get_readable (frame, local->inode, this,
- local->readable, NULL,
- local->transaction.type);
- if (ret < 0) {
gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN,
"Failing %s on gfid %s: split-brain observed.",
gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
- local->op_ret = -1;
- local->op_errno = -ret;
goto fail;
}
+
afr_transaction_start (frame, this);
return 0;
fail: