diff options
| -rwxr-xr-x | tests/bugs/bug-802417.t | 98 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 50 | 
2 files changed, 142 insertions, 6 deletions
diff --git a/tests/bugs/bug-802417.t b/tests/bugs/bug-802417.t new file mode 100755 index 00000000..c69af1d4 --- /dev/null +++ b/tests/bugs/bug-802417.t @@ -0,0 +1,98 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function write_file() +{ +	path="$1"; shift +	echo "$*" > "$path" +} + +function check_xattrs() +{ +	result="" + +	for observer in 0 1 2; do +		obs_path=${B0}/${V0}-$observer/a_file +		for target in 0 1 2; do +			tgt_xattr="trusted.afr.${V0}-client-$target" +			actual=$(afr_get_changelog_xattr $obs_path $tgt_xattr) +			if [ $observer -ne 2 -a $target -eq 2 ]; then +				expected=0x000000020000000000000000 +			else  +				expected=0x000000000000000000000000 +			fi +			if [ "$actual" = "$expected" ]; then +				result="${result}y" +			else +				result="${result}n" +			fi +		done +	done + +	echo $result +} +	 +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +## Start and create a volume +mkdir -p ${B0}/${V0}-0 +mkdir -p ${B0}/${V0}-1 +mkdir -p ${B0}/${V0}-2 +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2} + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Make sure io-cache and write-behind don't interfere. +TEST $CLI volume set $V0 performance.io-cache off; +TEST $CLI volume set $V0 performance.write-behind off; +TEST $CLI volume set $V0 performance.stat-prefetch off + +## Make sure automatic self-heal doesn't perturb our results. +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount native +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + +## Create a file with some recognizably stale data. +TEST write_file $M0/a_file "old_data" + +## Kill two of the bricks and write some newer data. +TEST kill_brick ${V0} ${H0} ${B0}/${V0}-1 +TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2 +TEST write_file $M0/a_file "new_data" + +## Bring all the bricks up and kill one so we do a partial self-heal. +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1 +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 2 +TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2 +TEST ls -l ${M0}/a_file + +EXPECT "yyyyyyyyy" check_xattrs + +if [ "$EXIT_EARLY" = "1" ]; then +	exit 0; +fi + +## Finish up +TEST umount $M0; +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index a0dfa59d..b5b42aec 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -833,14 +833,46 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,                           int32_t *delta_matrix[], unsigned char success[],                           int child_count, afr_transaction_type type)  { -        int i = 0; -        int j = 0; +        int     tgt     = 0; +        int     src     = 0; +        int     value   = 0;          afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,                                    xattr, type, priv->child_count); -        for (i = 0; i < priv->child_count; i++) -                for (j = 0; j < priv->child_count; j++) -                        delta_matrix[i][j] = -delta_matrix[i][j]; + +        /* +         * The algorithm here has two parts.  First, for each subvol indexed +         * as tgt, we try to figure out what count everyone should have for it. +         * If the self-heal succeeded, that's easy; the value is zero. +         * Otherwise, the value is the maximum of the succeeding nodes' counts. +         * Once we know the value, we loop through (possibly for a second time) +         * setting each count to the difference so that when we're done all +         * succeeding nodes will have the same count for tgt. +         */ +        for (tgt = 0; tgt < priv->child_count; ++tgt) { +                value = 0; +                if (!success[tgt]) { +                        /* Find the maximum. */ +                        for (src = 0; src < priv->child_count; ++src) { +                                if (!success[src]) { +                                        continue; +                                } +                                if (delta_matrix[src][tgt] > value) { +                                        value = delta_matrix[src][tgt]; +                                } +                        } +                } +                /* Force everyone who succeeded to the chosen value. */ +                for (src = 0; src < priv->child_count; ++src) { +                        if (success[src]) { +                                delta_matrix[src][tgt] = value +                                                       - delta_matrix[src][tgt]; +                        } +                        else { +                                delta_matrix[src][tgt] = 0; +                        } +                } +        }  } @@ -867,8 +899,14 @@ afr_sh_delta_to_xattr (xlator_t *this,                          pending = GF_CALLOC (sizeof (int32_t), 3,                                               gf_afr_mt_int32_t); -                        if (!pending) +                        if (!pending) { +                                gf_log (this->name, GF_LOG_ERROR, +                                        "failed to allocate pending entry " +                                        "for %s[%d] on %s", +                                        priv->pending_key[j], type, +                                        priv->children[i]->name);                                  continue; +                        }                          /* 3 = data+metadata+entry */                          k = afr_index_for_transaction_type (type);  | 
