diff options
-rwxr-xr-x | tests/bugs/bug-802417.t | 98 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 50 |
2 files changed, 142 insertions, 6 deletions
diff --git a/tests/bugs/bug-802417.t b/tests/bugs/bug-802417.t new file mode 100755 index 00000000000..c69af1d47c3 --- /dev/null +++ b/tests/bugs/bug-802417.t @@ -0,0 +1,98 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function write_file() +{ + path="$1"; shift + echo "$*" > "$path" +} + +function check_xattrs() +{ + result="" + + for observer in 0 1 2; do + obs_path=${B0}/${V0}-$observer/a_file + for target in 0 1 2; do + tgt_xattr="trusted.afr.${V0}-client-$target" + actual=$(afr_get_changelog_xattr $obs_path $tgt_xattr) + if [ $observer -ne 2 -a $target -eq 2 ]; then + expected=0x000000020000000000000000 + else + expected=0x000000000000000000000000 + fi + if [ "$actual" = "$expected" ]; then + result="${result}y" + else + result="${result}n" + fi + done + done + + echo $result +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +## Start and create a volume +mkdir -p ${B0}/${V0}-0 +mkdir -p ${B0}/${V0}-1 +mkdir -p ${B0}/${V0}-2 +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2} + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Make sure io-cache and write-behind don't interfere. +TEST $CLI volume set $V0 performance.io-cache off; +TEST $CLI volume set $V0 performance.write-behind off; +TEST $CLI volume set $V0 performance.stat-prefetch off + +## Make sure automatic self-heal doesn't perturb our results. +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount native +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + +## Create a file with some recognizably stale data. +TEST write_file $M0/a_file "old_data" + +## Kill two of the bricks and write some newer data. +TEST kill_brick ${V0} ${H0} ${B0}/${V0}-1 +TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2 +TEST write_file $M0/a_file "new_data" + +## Bring all the bricks up and kill one so we do a partial self-heal. +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1 +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 2 +TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2 +TEST ls -l ${M0}/a_file + +EXPECT "yyyyyyyyy" check_xattrs + +if [ "$EXIT_EARLY" = "1" ]; then + exit 0; +fi + +## Finish up +TEST umount $M0; +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index a0dfa59d9a6..b5b42aecb0b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -833,14 +833,46 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr, int32_t *delta_matrix[], unsigned char success[], int child_count, afr_transaction_type type) { - int i = 0; - int j = 0; + int tgt = 0; + int src = 0; + int value = 0; afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL, xattr, type, priv->child_count); - for (i = 0; i < priv->child_count; i++) - for (j = 0; j < priv->child_count; j++) - delta_matrix[i][j] = -delta_matrix[i][j]; + + /* + * The algorithm here has two parts. First, for each subvol indexed + * as tgt, we try to figure out what count everyone should have for it. + * If the self-heal succeeded, that's easy; the value is zero. + * Otherwise, the value is the maximum of the succeeding nodes' counts. + * Once we know the value, we loop through (possibly for a second time) + * setting each count to the difference so that when we're done all + * succeeding nodes will have the same count for tgt. + */ + for (tgt = 0; tgt < priv->child_count; ++tgt) { + value = 0; + if (!success[tgt]) { + /* Find the maximum. */ + for (src = 0; src < priv->child_count; ++src) { + if (!success[src]) { + continue; + } + if (delta_matrix[src][tgt] > value) { + value = delta_matrix[src][tgt]; + } + } + } + /* Force everyone who succeeded to the chosen value. */ + for (src = 0; src < priv->child_count; ++src) { + if (success[src]) { + delta_matrix[src][tgt] = value + - delta_matrix[src][tgt]; + } + else { + delta_matrix[src][tgt] = 0; + } + } + } } @@ -867,8 +899,14 @@ afr_sh_delta_to_xattr (xlator_t *this, pending = GF_CALLOC (sizeof (int32_t), 3, gf_afr_mt_int32_t); - if (!pending) + if (!pending) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate pending entry " + "for %s[%d] on %s", + priv->pending_key[j], type, + priv->children[i]->name); continue; + } /* 3 = data+metadata+entry */ k = afr_index_for_transaction_type (type); |