diff options
| -rw-r--r-- | tests/bugs/replicate/bug-1386188-sbrain-fav-child.t | 82 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 205 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 13 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 159 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 20 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 15 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 24 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 9 | 
9 files changed, 469 insertions, 61 deletions
diff --git a/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t new file mode 100644 index 00000000000..d049d95ef9a --- /dev/null +++ b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t @@ -0,0 +1,82 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST touch $M0/data.txt +TEST touch $M0/mdata.txt + +#Create data and metadata split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024 +TEST setfattr -n user.value -v value1 $M0/mdata.txt +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024 +TEST setfattr -n user.value -v value2 $M0/mdata.txt + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +## Check that the file still in split-brain, +  ## I/O fails +  cat $M0/data.txt > /dev/null +  EXPECT "1" echo $? +  ## pending xattrs blame each other. +  brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/data.txt) +  brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt) +  TEST [ $brick0_pending -ne "000000000000000000000000" ] +  TEST [ $brick1_pending -ne "000000000000000000000000" ] + +  ## I/O fails +  getfattr -n user.value $M0/mdata.txt +  EXPECT "1" echo $? +  brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/mdata.txt) +  brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/mdata.txt) +  TEST [ $brick0_pending -ne "000000000000000000000000" ] +  TEST [ $brick1_pending -ne "000000000000000000000000" ] + +## Let us use mtime as fav-child policy. So brick0 will be source. +   # Set dirty (data part) on the sink brick to check if it is reset later along with the pending xattr. +   TEST setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/${V0}1/data.txt +   # Set dirty (metadata part) on the sink brick to check if it is reset later along with the pending xattr. +   TEST setfattr -n trusted.afr.dirty -v 0x000000000000000100000000 $B0/${V0}1/mdata.txt + +   TEST $CLI volume set $V0 favorite-child-policy mtime + +   # Reading the file should be allowed and sink brick xattrs must be reset. +   cat $M0/data.txt > /dev/null +   EXPECT "0" echo $? +   TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt) +   TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt) +   TEST [ $brick1_dirty -eq "000000000000000000000000" ] +   TEST [ $brick1_pending -eq "000000000000000000000000" ] + +   # Accessing the file should be allowed and sink brick xattrs must be reset. +   EXPECT "value2" echo $(getfattr --only-values -n user.value  $M0/mdata.txt) +   TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt) +   TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt) +   TEST [ $brick1_dirty -eq "000000000000000000000000" ] +   TEST [ $brick1_pending -eq "000000000000000000000000" ] + +#Enable shd and heal the file. +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT 0 get_pending_heal_count $V0 +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 7173e8b032e..024f0f5f589 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -53,6 +53,13 @@ afr_quorum_errno (afr_private_t *priv)          return EROFS;  } +int +afr_fav_child_reset_sink_xattrs (void *opaque); + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame, +                                     void *opaque); +  gf_boolean_t  afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,                                 int32_t *op_errno) @@ -1011,6 +1018,82 @@ afr_selfheal_enabled (xlator_t *this)  	return data || priv->metadata_self_heal || priv->entry_self_heal;  } + +int +afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + +        call_frame_t *heal_frame = NULL; +        afr_local_t *heal_local = NULL; +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        inode_t *inode = NULL; +        int event_generation = 0; +        int read_subvol = -1; +        int op_errno = ENOMEM; +        int ret = 0; + +        local = frame->local; +        inode = local->inode; +        priv = this->private; + +        if (err) +                goto refresh_done; + +        if (local->op == GF_FOP_LOOKUP) +                goto refresh_done; + +        ret = afr_inode_get_readable (frame, inode, this, local->readable, +                                      &event_generation, +                                      local->transaction.type); + +        if (ret == -EIO || !event_generation) { +                /* No readable subvolume even after refresh ==> splitbrain.*/ +                if (!priv->fav_child_policy) { +                        err = -EIO; +                        goto refresh_done; +                } +                read_subvol = afr_sh_get_fav_by_policy (this, local->replies, +                                                        inode, NULL); +                if (read_subvol == -1) { +                        err = -EIO; +                        goto refresh_done; +                } + +                heal_frame = copy_frame (frame); +                if (!heal_frame) { +                        err = -EIO; +                        goto refresh_done; +                } +                heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; +                heal_local = AFR_FRAME_INIT (heal_frame, op_errno); +                if (!heal_local) { +                        err = -EIO; +                        AFR_STACK_DESTROY (heal_frame); +                        goto refresh_done; +                } +                heal_local->xdata_req = dict_new(); +                if (!heal_local->xdata_req) { +                        err = -EIO; +                        AFR_STACK_DESTROY (heal_frame); +                        goto refresh_done; +                } +                heal_local->heal_frame = frame; +                ret = synctask_new (this->ctx->env, +                                    afr_fav_child_reset_sink_xattrs, +                                    afr_fav_child_reset_sink_xattrs_cbk, +                                    heal_frame, +                                    heal_frame); +                return 0; +        } + +refresh_done: +        afr_local_replies_wipe (local, this->private); +        local->refreshfn (frame, this, err); + +        return 0; +} +  int  afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)  { @@ -1029,8 +1112,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)  	err = afr_inode_refresh_err (frame, this); -        afr_local_replies_wipe (local, this->private); -  	if (ret && afr_selfheal_enabled (this) && start_heal) {                  heal_frame = copy_frame (frame);                  if (!heal_frame) @@ -1047,7 +1128,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)          }  refresh_done: -        local->refreshfn (frame, this, err); +        afr_txn_refresh_done (frame, this, err);  	return 0;  } @@ -5110,6 +5191,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,          unsigned char *sources = NULL;          unsigned char *sinks = NULL;          unsigned char *healed_sinks = NULL; +        unsigned char *undid_pending = NULL;          struct afr_reply *locked_replies = NULL;          afr_private_t *priv = this->private; @@ -5118,6 +5200,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,          sources = alloca0 (priv->child_count);          sinks = alloca0 (priv->child_count);          healed_sinks = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count);          locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -5134,6 +5217,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,                  ret = __afr_selfheal_metadata_prepare (frame, this, inode,                                                         locked_on, sources,                                                         sinks, healed_sinks, +                                                       undid_pending,                                                         locked_replies,                                                         pending);                  *msh = afr_decide_heal_info (priv, sources, ret); @@ -5157,6 +5241,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,          unsigned char *sources = NULL;          unsigned char *sinks = NULL;          unsigned char *healed_sinks = NULL; +        unsigned char *undid_pending = NULL;          afr_private_t   *priv = NULL;          fd_t          *fd = NULL;          struct afr_reply *locked_replies = NULL; @@ -5170,6 +5255,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,          sources = alloca0 (priv->child_count);          sinks = alloca0 (priv->child_count);          healed_sinks = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count);          /* Heal-info does an open() on the file being examined so that the           * current eager-lock holding client, if present, at some point sees @@ -5209,6 +5295,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,                          ret = __afr_selfheal_data_prepare (frame, this, inode,                                                             data_lock, sources,                                                             sinks, healed_sinks, +                                                           undid_pending,                                                             locked_replies,                                                             pflag);                          *dsh = afr_decide_heal_info (priv, sources, ret); @@ -5796,3 +5883,115 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata,          if (newloc_xdata)                  dict_unref (newloc_xdata);  } + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame, +                                     void *opaque) +{ + +        call_frame_t *txn_frame = NULL; +        afr_local_t *local = NULL; +        afr_local_t *heal_local = NULL; +        xlator_t *this = NULL; + +        heal_local = heal_frame->local; +        txn_frame = heal_local->heal_frame; +        local = txn_frame->local; +        this = txn_frame->this; + +        /* Refresh the inode agan and proceed with the transaction.*/ +        afr_inode_refresh (txn_frame, this, local->inode, NULL, +                           local->refreshfn); + +        if (heal_frame) +                AFR_STACK_DESTROY (heal_frame); + +        return 0; +} + +int +afr_fav_child_reset_sink_xattrs (void *opaque) +{ +        call_frame_t *heal_frame = NULL; +        call_frame_t *txn_frame = NULL; +        xlator_t *this = NULL; +        gf_boolean_t d_spb = _gf_false; +        gf_boolean_t m_spb = _gf_false; +        afr_local_t *heal_local = NULL; +        afr_local_t *txn_local = NULL; +        afr_private_t *priv = NULL; +        inode_t *inode  = NULL; +        unsigned char *locked_on = NULL; +        unsigned char *sources = NULL; +        unsigned char *sinks = NULL; +        unsigned char *healed_sinks = NULL; +        unsigned char *undid_pending = NULL; +        struct afr_reply *locked_replies = NULL; +        int ret = 0; + +        heal_frame = (call_frame_t *) opaque; +        heal_local = heal_frame->local; +        txn_frame = heal_local->heal_frame; +        txn_local = txn_frame->local; +        this = txn_frame->this; +        inode = txn_local->inode; +        priv = this->private; +        locked_on = alloca0 (priv->child_count); +        sources = alloca0 (priv->child_count); +        sinks = alloca0 (priv->child_count); +        healed_sinks = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count); +        locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + +        ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, +                                   AFR_DATA_TRANSACTION, &d_spb); + +        ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, +                                   AFR_METADATA_TRANSACTION, &m_spb); + +        /* Take appropriate locks and reset sink xattrs. */ +        if (d_spb) { +                ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, +                                            0, 0, locked_on); +                { +                        if (ret < AFR_SH_MIN_PARTICIPANTS) +                                goto data_unlock; +                        ret = __afr_selfheal_data_prepare (heal_frame, this, +                                                           inode, locked_on, +                                                           sources, sinks, +                                                           healed_sinks, +                                                           undid_pending, +                                                           locked_replies, +                                                           NULL); +                } +data_unlock: +                afr_selfheal_uninodelk (heal_frame, this, inode, this->name, +                                        0, 0, locked_on); +        } + +        if (m_spb) { +                memset (locked_on, 0, sizeof (*locked_on) * priv->child_count); +                memset (undid_pending, 0, +                        sizeof (*undid_pending) * priv->child_count); +                ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, +                                            LLONG_MAX-1, 0, locked_on); +                { +                        if (ret < AFR_SH_MIN_PARTICIPANTS) +                                goto mdata_unlock; +                        ret = __afr_selfheal_metadata_prepare (heal_frame, this, +                                                               inode, locked_on, +                                                               sources, sinks, +                                                               healed_sinks, +                                                               undid_pending, +                                                               locked_replies, +                                                               NULL); + +                } +mdata_unlock: +                afr_selfheal_uninodelk (heal_frame, this, inode, this->name, +                                        LLONG_MAX-1, 0, locked_on); +        } + +        return ret; + +} diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index cb81af42510..26b0f1c2a11 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -64,7 +64,6 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)  {  	afr_local_t *local = NULL;  	int read_subvol = 0; -	int event_generation = 0;  	inode_t *inode = NULL;  	int ret = -1;          int spb_choice = -1; @@ -76,18 +75,12 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)                  local->op_errno = -err;                  local->op_ret = -1;                  read_subvol = -1; +                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, +                        "Failing %s on gfid %s: split-brain observed.", +                        gf_fop_list[local->op], uuid_utoa (inode->gfid));                  goto readfn;          } -	ret = afr_inode_get_readable (frame, inode, this, local->readable, -			              &event_generation, -				      local->transaction.type); - -	if (ret == -EIO || !event_generation) -		/* Even after refresh, we don't have a good -		   read subvolume. Time to bail */ -                AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); -  	read_subvol = afr_read_subvol_select_by_policy (inode, this,  							local->readable, NULL);  	if (read_subvol == -1) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b6720ccfa5c..c3b62e3781a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -145,8 +145,10 @@ err:  int  afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,  			   unsigned char *sources, unsigned char *sinks, -			   unsigned char *healed_sinks, afr_transaction_type type, -			   struct afr_reply *replies, unsigned char *locked_on) +			   unsigned char *healed_sinks, +                           unsigned char *undid_pending, +                           afr_transaction_type type, struct afr_reply *replies, +                           unsigned char *locked_on)  {  	afr_private_t *priv = NULL;          afr_local_t *local = NULL; @@ -214,6 +216,10 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,  			   and inspected on.  			*/  			continue; +                if (undid_pending[i]) +                        /* We already unset the pending xattrs in +                         * _afr_fav_child_reset_sink_xattrs(). */ +                        continue;  		xattr = afr_selfheal_output_xattr (this, local->need_full_crawl,                                                     type, output_dirty, @@ -735,6 +741,42 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)          return fav_child;  } +int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, +                          inode_t *inode, char **policy_str) +{ +        afr_private_t *priv = NULL; +        int fav_child = -1; + +        priv = this->private; +        switch (priv->fav_child_policy) { +        case AFR_FAV_CHILD_BY_SIZE: +                fav_child = afr_sh_fav_by_size (this, replies, inode); +                if (policy_str && fav_child >= 0) +                        *policy_str = "SIZE"; +                break; +        case AFR_FAV_CHILD_BY_CTIME: +                fav_child = afr_sh_fav_by_ctime (this, replies, inode); +                if (policy_str && fav_child >= 0) +                        *policy_str = "CTIME"; +                break; +        case AFR_FAV_CHILD_BY_MTIME: +                fav_child = afr_sh_fav_by_mtime (this, replies, inode); +                if (policy_str && fav_child >= 0) +                        *policy_str = "MTIME"; +                break; +        case AFR_FAV_CHILD_BY_MAJORITY: +                fav_child = afr_sh_fav_by_majority (this, replies, inode); +                if (policy_str && fav_child >= 0) +                        *policy_str = "MAJORITY"; +                break; +        case AFR_FAV_CHILD_NONE: +        default: +                break; +        } + +        return fav_child; +}  int  afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, @@ -756,24 +798,9 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,          time_t time;          priv = this->private; -        if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY)  { -                fav_child = afr_sh_fav_by_majority (this, replies, inode); -                if (fav_child >= 0) -                        policy_str = "MAJORITY"; -        } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME)  { -                fav_child = afr_sh_fav_by_mtime (this, replies, inode); -                if (fav_child >= 0) -                        policy_str = "MTIME"; -        } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME)  { -                fav_child = afr_sh_fav_by_ctime (this, replies, inode); -                if (fav_child >= 0) -                        policy_str = "CTIME"; -        } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE)  { -                fav_child = afr_sh_fav_by_size (this, replies, inode); -                if (fav_child >= 0) -                        policy_str = "SIZE"; -        } +        fav_child = afr_sh_get_fav_by_policy (this, replies, inode, +                                              &policy_str);          if (fav_child > priv->child_count - 1) {                  gf_msg (this->name, GF_LOG_ERROR, 0,                          AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) " @@ -829,6 +856,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,          dict_t *xdata_req = NULL;          int heal_op = -1;          int ret = -1; +        int source = -1;          local = frame->local;          priv = this->private; @@ -838,27 +866,96 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,          if (ret)                  goto autoheal; -        ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, +        source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,                                                              sources, sinks,                                                              healed_sinks,                                                              locked_on, replies,                                                              type, heal_op); -        return ret; +        return source;  autoheal:          /* Automatically heal if fav_child_policy is set. */          if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) { -                ret = afr_mark_split_brain_source_sinks_by_policy (frame, this, -                                                                   inode, -                                                                   sources, -                                                                   sinks, +                source = afr_mark_split_brain_source_sinks_by_policy (frame, +                                                                      this, +                                                                      inode, +                                                                      sources, +                                                                      sinks,                                                                     healed_sinks, -                                                                   locked_on, -                                                                   replies, -                                                                   type); +                                                                      locked_on, +                                                                      replies, +                                                                      type); +                if (source != -1) { +                        ret = dict_set_int32 (xdata_req, "fav-child-policy", 1); +                        if (ret) +                                return -1; +                }          } -        return ret; +        return source; +} + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, +                                  inode_t *inode, int source, +                                  unsigned char *healed_sinks, +                                  unsigned char *undid_pending, +                                  afr_transaction_type type, +                                  unsigned char *locked_on, +                                  struct afr_reply *replies) +{ +        afr_private_t *priv = NULL; +        afr_local_t *local = NULL; +        int *input_dirty = NULL; +        int **input_matrix = NULL; +	int *output_dirty = NULL; +	int **output_matrix = NULL; +        dict_t *xattr = NULL; +        dict_t *xdata = NULL; +        int i = 0; + +        priv = this->private; +        local = frame->local; + +        if (!dict_get (local->xdata_req, "fav-child-policy")) +                return 0; + +        xdata = dict_new(); +        if (!xdata) +                return -1; + +        input_dirty = alloca0 (priv->child_count * sizeof (int)); +	input_matrix = ALLOC_MATRIX (priv->child_count, int); +	output_dirty = alloca0 (priv->child_count * sizeof (int)); +	output_matrix = ALLOC_MATRIX (priv->child_count, int); + +        afr_selfheal_extract_xattr (this, replies, type, input_dirty, +                                    input_matrix); + +        for (i = 0; i < priv->child_count; i++) { +                if (i == source || !healed_sinks[i]) +                        continue; +                output_dirty[i] = -input_dirty[i]; +                output_matrix[i][source] = -input_matrix[i][source]; +        } + +        for (i = 0; i < priv->child_count; i++) { +                if (!healed_sinks[i] || !locked_on[i]) +                        continue; +                xattr = afr_selfheal_output_xattr (this, _gf_false, type, +                                                   output_dirty, output_matrix, +                                                   i, NULL); + +                afr_selfheal_post_op (frame, this, inode, i, xattr, xdata); + +                undid_pending[i] = 1; +                dict_unref (xattr); +        } + +        if (xdata) +                dict_unref (xdata); + +        return 0;  }  gf_boolean_t @@ -1906,11 +2003,15 @@ afr_selfheal (xlator_t *this, uuid_t gfid)  {          int           ret   = -1;  	call_frame_t *frame = NULL; +        afr_local_t *local = NULL;  	frame = afr_frame_create (this);  	if (!frame)  		return ret; +        local = frame->local; +        local->xdata_req = dict_new(); +          ret = afr_selfheal_do (frame, this, gfid);  	if (frame) diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index fbbbd192323..d032284926c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -580,6 +580,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,                                       unsigned char *sinks,  				     unsigned char *healed_sinks,  				     unsigned char *locked_on, +				     unsigned char *undid_pending,  				     struct afr_reply *replies,                                       uint64_t *witness)  { @@ -603,6 +604,11 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,                                   "file=%s", this->name, uuid_utoa(inode->gfid));                          return -EIO;                  } + +                _afr_fav_child_reset_sink_xattrs (frame, this, inode, source, +                                                 healed_sinks, undid_pending, +                                                 AFR_DATA_TRANSACTION, +                                                 locked_on, replies);                  return source;  	} @@ -642,6 +648,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,                               inode_t *inode, unsigned char *locked_on,                               unsigned char *sources, unsigned char *sinks,                               unsigned char *healed_sinks, +                             unsigned char *undid_pending,  			     struct afr_reply *replies, gf_boolean_t *pflag)  {  	int ret = -1; @@ -677,8 +684,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,  	source = __afr_selfheal_data_finalize_source (frame, this, inode,                                                        sources, sinks,                                                        healed_sinks, -                                                      locked_on, replies, -                                                      witness); +                                                      locked_on, undid_pending, +                                                      replies, witness);  	if (source < 0)  		return -EIO; @@ -696,6 +703,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,  	unsigned char *sinks = NULL;  	unsigned char *data_lock = NULL;  	unsigned char *healed_sinks = NULL; +	unsigned char *undid_pending = NULL;  	struct afr_reply *locked_replies = NULL;  	int source = -1;          gf_boolean_t did_sh = _gf_true; @@ -707,6 +715,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,  	sinks = alloca0 (priv->child_count);  	healed_sinks = alloca0 (priv->child_count);  	data_lock = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count);  	locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -726,9 +735,8 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,  		ret = __afr_selfheal_data_prepare (frame, this, fd->inode,                                                     data_lock, sources, sinks, -                                                   healed_sinks, -						   locked_replies, -                                                   NULL); +                                                   healed_sinks, undid_pending, +                                                   locked_replies, NULL);  		if (ret < 0)  			goto unlock; @@ -787,7 +795,7 @@ restore_time:          }          ret = afr_selfheal_undo_pending (frame, this, fd->inode,                                           sources, sinks, healed_sinks, -                                         AFR_DATA_TRANSACTION, +                                         undid_pending, AFR_DATA_TRANSACTION,                                           locked_replies, data_lock);  skip_undo_pending:  	afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index a0e361ab987..d8fe5422372 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -913,6 +913,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,  	unsigned char          *data_lock             = NULL;          unsigned char          *postop_lock           = NULL;  	unsigned char          *healed_sinks          = NULL; +	unsigned char          *undid_pending         = NULL;  	struct afr_reply       *locked_replies        = NULL;          afr_local_t            *local                 = NULL;  	afr_private_t          *priv                  = NULL; @@ -924,6 +925,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,  	sources = alloca0 (priv->child_count);  	sinks = alloca0 (priv->child_count);  	healed_sinks = alloca0 (priv->child_count); +	undid_pending = alloca0 (priv->child_count);  	data_lock = alloca0 (priv->child_count);          postop_lock = alloca0 (priv->child_count); @@ -996,6 +998,7 @@ unlock:                  ret = afr_selfheal_undo_pending (frame, this, fd->inode,                                                   sources, sinks, healed_sinks, +                                                 undid_pending,                                                   AFR_ENTRY_TRANSACTION,                                                   locked_replies, postop_lock);          } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 9dfe4a14e8c..5839ddc2e0f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -203,6 +203,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,                                           unsigned char *sources,                                           unsigned char *sinks,  					 unsigned char *healed_sinks, +					 unsigned char *undid_pending,  					 unsigned char *locked_on,  					 struct afr_reply *replies)  { @@ -224,8 +225,14 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,                                                              healed_sinks,                                                              locked_on, replies,                                                        AFR_METADATA_TRANSACTION); -                if (source >= 0) +                if (source >= 0) { +                        _afr_fav_child_reset_sink_xattrs (frame, this, inode, +                                                         source, healed_sinks, +                                                         undid_pending, +                                                       AFR_METADATA_TRANSACTION, +                                                         locked_on, replies);                          return source; +                }  		/* If this is a directory mtime/ctime only split brain  		   use the most recent */ @@ -308,6 +315,7 @@ int  __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,  				 unsigned char *locked_on, unsigned char *sources,  				 unsigned char *sinks, unsigned char *healed_sinks, +                                 unsigned char *undid_pending,  				 struct afr_reply *replies, gf_boolean_t *pflag)  {  	int ret = -1; @@ -362,6 +370,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i  	source = __afr_selfheal_metadata_finalize_source (frame, this, inode,                                                            sources, sinks,                                                            healed_sinks, +                                                          undid_pending,                                                            locked_on, replies);  	if (source < 0) @@ -379,6 +388,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)  	unsigned char *sinks = NULL;  	unsigned char *data_lock = NULL;  	unsigned char *healed_sinks = NULL; +	unsigned char *undid_pending = NULL;  	struct afr_reply *locked_replies = NULL;          gf_boolean_t did_sh = _gf_true;  	int source = -1; @@ -388,6 +398,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)  	sources = alloca0 (priv->child_count);  	sinks = alloca0 (priv->child_count);  	healed_sinks = alloca0 (priv->child_count); +	undid_pending = alloca0 (priv->child_count);  	data_lock = alloca0 (priv->child_count);  	locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -403,6 +414,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)  		ret = __afr_selfheal_metadata_prepare (frame, this, inode,                                                         data_lock, sources,                                                         sinks, healed_sinks, +                                                       undid_pending,  						       locked_replies, NULL);  		if (ret < 0)  			goto unlock; @@ -421,6 +433,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)                  ret = afr_selfheal_undo_pending (frame, this, inode, sources,                                                   sinks, healed_sinks, +                                                 undid_pending,                                                   AFR_METADATA_TRANSACTION,                                                   locked_replies, data_lock);  	} diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index ec5337e60b2..80b7f3a125d 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -167,8 +167,10 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,  int  afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,  			   unsigned char *sources, unsigned char *sinks, -			   unsigned char *healed_sinks, afr_transaction_type type, -			   struct afr_reply *replies, unsigned char *locked_on); +			   unsigned char *healed_sinks, +                           unsigned char *undid_pending, +                           afr_transaction_type type, struct afr_reply *replies, +                           unsigned char *locked_on);  int  afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir, @@ -229,6 +231,19 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,                                     afr_transaction_type type);  int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, +                          inode_t *inode, char **policy_str); + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, +                                 inode_t *inode, int source, +                                 unsigned char *healed_sinks, +                                 unsigned char *undid_pending, +                                 afr_transaction_type type, +                                 unsigned char *locked_on, +                                 struct afr_reply *replies); + +int  afr_get_child_index_from_name (xlator_t *this, char *name);  gf_boolean_t @@ -239,8 +254,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this,                               inode_t *inode, unsigned char *locked_on,                               unsigned char *sources,                               unsigned char *sinks, unsigned char *healed_sinks, -                             struct afr_reply *replies, -                             gf_boolean_t *flag); +                             unsigned char *undid_pending, +                             struct afr_reply *replies, gf_boolean_t *flag);  int  __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, @@ -248,6 +263,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this,                                   unsigned char *sources,                                   unsigned char *sinks,                                   unsigned char *healed_sinks, +                                 unsigned char *undid_pending,                                   struct afr_reply *replies,                                   gf_boolean_t *flag);  int diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 4906921ca6a..e6878eb35ff 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2527,19 +2527,12 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)          if (err) {                  local->op_errno = -err;                  local->op_ret = -1; -                goto fail; -        } -	ret = afr_inode_get_readable (frame, local->inode, this, -                                      local->readable, NULL, -				      local->transaction.type); -        if (ret < 0) {                  gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN,                          "Failing %s on gfid %s: split-brain observed.",                          gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); -                local->op_ret = -1; -                local->op_errno = -ret;                  goto fail;          } +          afr_transaction_start (frame, this);          return 0;  fail:  | 
