From 76788178ba725442c95541e37e56d4a83da2bb78 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 27 Sep 2018 17:26:52 +0530 Subject: afr: fix incorrect reporting of directory split-brain Backport of https://review.gluster.org/#/c/glusterfs/+/21135/ Problem: When a directory has dirty xattrs due to failed post-ops or when replace/reset brick is performed, AFR does a conservative merge as expected, but heal-info reports it as split-brain because there are no clear sources. Fix: Modify pending flag to contain information about pending heals and split-brains. For directories, if spit-brain flag is not set,just show them as needing heal and not being in split-brain. Fixes: bz#1633625 Change-Id: I09ef821f6887c87d315ae99e6b1de05103cd9383 BUG: 1633625 Signed-off-by: Ravishankar N --- tests/afr.rc | 2 +- .../bugs/replicate/bug-1626994-info-split-brain.t | 62 ++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 14 ++--- xlators/cluster/afr/src/afr-self-heal-common.c | 6 ++- xlators/cluster/afr/src/afr-self-heal-data.c | 2 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 2 +- xlators/cluster/afr/src/afr-self-heal-metadata.c | 3 +- xlators/cluster/afr/src/afr-self-heal.h | 8 +-- xlators/cluster/afr/src/afr.h | 3 ++ 9 files changed, 85 insertions(+), 17 deletions(-) create mode 100644 tests/bugs/replicate/bug-1626994-info-split-brain.t diff --git a/tests/afr.rc b/tests/afr.rc index bdf4075a233..1fd0310d361 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -2,7 +2,7 @@ function create_brick_xattrop_entry { local xattrop_dir=$(afr_get_index_path $1) - local base_entry=`ls $xattrop_dir` + local base_entry=`ls $xattrop_dir|grep xattrop` local gfid_str local params=`echo "$@" | cut -d' ' -f2-` echo $params diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t new file mode 100644 index 00000000000..86bfecb1a9e --- /dev/null +++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t @@ -0,0 +1,62 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +# Test to check dirs having dirty xattr do not show up in info split-brain. + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +# Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 +EXPECT "1" count_index_entries $B0/${V0}2 + +TEST mkdir $M0/dirty_dir +TEST mkdir $M0/pending_dir + +# Set dirty xattrs on all bricks to simulate the case where entry transaction +# succeeded only the pre-op phase. +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir +create_brick_xattrop_entry $B0/${V0}0 dirty_dir +# Should not show up as split-brain. +EXPECT "0" afr_get_split_brain_count $V0 + +# replace/reset brick case where the new brick has dirty and the other 2 bricks +# blame it should not be reported as split-brain. +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0 +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1 +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2 +create_brick_xattrop_entry $B0/${V0}0 "/" +# Should not show up as split-brain. +EXPECT "0" afr_get_split_brain_count $V0 + +# Set pending xattrs on all bricks blaming each other to simulate the case of +# entry split-brain. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir +create_brick_xattrop_entry $B0/${V0}0 pending_dir +# Should show up as split-brain. +EXPECT "1" afr_get_split_brain_count $V0 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 613f9d1db75..8ab7e24ae59 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5700,7 +5700,7 @@ out: int afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t *msh, - gf_boolean_t *pending) + unsigned char *pending) { int ret = -1; unsigned char *locked_on = NULL; @@ -5749,7 +5749,7 @@ out: int afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t *dsh, - gf_boolean_t *pflag) + unsigned char *pflag) { int ret = -1; unsigned char *data_lock = NULL; @@ -5810,7 +5810,7 @@ out: int afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this, inode_t *inode, - gf_boolean_t *esh, gf_boolean_t *pflag) + gf_boolean_t *esh, unsigned char *pflag) { int ret = -1; int source = -1; @@ -5861,7 +5861,7 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this, sinks, healed_sinks, locked_replies, &source, pflag); - if ((ret == 0) && source < 0) + if ((ret == 0) && (*pflag & PFLAG_SBRAIN)) ret = -EIO; *esh = afr_decide_heal_info (priv, sources, ret); } @@ -5884,7 +5884,7 @@ afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid, gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, gf_boolean_t *metadata_selfheal, - gf_boolean_t *pending) + unsigned char *pending) { int ret = -1; @@ -5954,7 +5954,7 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc) gf_boolean_t data_selfheal = _gf_false; gf_boolean_t metadata_selfheal = _gf_false; gf_boolean_t entry_selfheal = _gf_false; - gf_boolean_t pending = _gf_false; + unsigned char pending = 0; dict_t *dict = NULL; int ret = -1; int op_errno = 0; @@ -5974,7 +5974,7 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc) goto out; } - if (pending) { + if (pending & PFLAG_PENDING) { size = strlen ("-pending") + 1; gf_asprintf (&substr, "-pending"); if (!substr) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 99871b24685..c2d5058081c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1539,7 +1539,7 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, afr_transaction_type type, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, uint64_t *witness, - gf_boolean_t *pflag) + unsigned char *pflag) { afr_private_t *priv = NULL; int i = 0; @@ -1567,7 +1567,7 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, for (i = 0; i < priv->child_count; i++) { for (j = 0; j < priv->child_count; j++) if (matrix[i][j]) - *pflag = _gf_true; + *pflag |= PFLAG_PENDING; if (*pflag) break; } @@ -1649,6 +1649,8 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, if (locked_on[i]) sinks[i] = 1; } + if (pflag) + *pflag |= PFLAG_SBRAIN; } /* One more class of witness similar to dirty in v2 is where no pending diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index d368be776b0..49e99e99b86 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -611,7 +611,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *pflag) + struct afr_reply *replies, unsigned char *pflag) { int ret = -1; int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index f6d3a8ae5cb..9f597af7dee 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -496,7 +496,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, struct afr_reply *replies, int *source_p, - gf_boolean_t *pflag) + unsigned char *pflag) { int ret = -1; int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 199f8961480..50f8888f110 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -318,7 +318,8 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *pflag) + struct afr_reply *replies, + unsigned char *pflag) { int ret = -1; int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 15e6c392dea..7b4905da065 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -172,7 +172,7 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, afr_transaction_type type, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, uint64_t *witness, - gf_boolean_t *flag); + unsigned char *flag); int afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol, int idx, dict_t *xdata); @@ -286,7 +286,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *flag); + struct afr_reply *replies, unsigned char *flag); int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, @@ -296,7 +296,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, unsigned char *healed_sinks, unsigned char *undid_pending, struct afr_reply *replies, - gf_boolean_t *flag); + unsigned char *flag); int __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, @@ -304,7 +304,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sinks, unsigned char *healed_sinks, struct afr_reply *replies, int *source_p, - gf_boolean_t *flag); + unsigned char *flag); int afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index db6f61eb4bd..36333a3cfaa 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -36,6 +36,9 @@ #define ARBITER_BRICK_INDEX 2 +#define PFLAG_PENDING (1 << 0) +#define PFLAG_SBRAIN (1 << 1) + typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol); -- cgit