From 796bed797858d3e9699f0f311e00e2fb34c25be2 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 6 Jul 2017 19:49:47 +0530 Subject: afr: mark non sources as sinks in metadata heal Problem: In a 3 way replica, when the source brick does not have pending xattrs for the sinks, but the 2 sinks blame each other, metadata heal was not happpening because we were not setting all non-sources as sinks. Fix: Mark all non-sources as sinks, like it is done in data and entry heal. > Reviewed-on: https://review.gluster.org/17717 > Smoke: Gluster Build System > Reviewed-by: Pranith Kumar Karampuri > CentOS-regression: Gluster Build System (cherry picked from commit 77c1ed5fd299914e91ff034d78ef6e3600b9151c) Change-Id: I534978940f5087302e307fcc810a48ffe898ce08 BUG: 1471611 Signed-off-by: Ravishankar N Reviewed-on: https://review.gluster.org/17781 Smoke: Gluster Build System Reviewed-by: Pranith Kumar Karampuri CentOS-regression: Gluster Build System --- .../bug-1468279-source-not-blaming-sinks.t | 64 ++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-data.c | 2 +- xlators/cluster/afr/src/afr-self-heal-metadata.c | 6 +- 3 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t new file mode 100644 index 00000000000..054a4adb90d --- /dev/null +++ b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t @@ -0,0 +1,64 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; +TEST touch $M0/file + +# Kill B1, create a pending metadata heal. +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST setfattr -n user.xattr -v value1 $M0/file +EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file +EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file + +# Kill B2, heal from B3 to B1. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +$CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata" +TEST $CLI volume set $V0 cluster.self-heal-daemon off + +# Create another pending metadata heal. +TEST setfattr -n user.xattr -v value2 $M0/file +EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file +EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file + +# Kill B1, heal from B3 to B2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +$CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata" +TEST $CLI volume set $V0 cluster.self-heal-daemon off + +# ALL bricks up again. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +# B1 and B2 blame each other, B3 doesn't blame anyone. +EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file +EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file +EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file +EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index de527dbf51a..13679608dfd 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -565,7 +565,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, healed_sinks, undid_pending, AFR_DATA_TRANSACTION, locked_on, replies); - return source; + goto out; } /* No split brain at this point. If we were called from diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index e7653030e45..f23cf8ec6ee 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -231,7 +231,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, undid_pending, AFR_METADATA_TRANSACTION, locked_on, replies); - return source; + goto out; } /* If this is a directory mtime/ctime only split brain @@ -245,7 +245,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, uuid_utoa (replies[source].poststat.ia_gfid)); sources[source] = 1; healed_sinks[source] = 0; - return source; + goto out; } if (!priv->metadata_splitbrain_forced_heal) { @@ -307,6 +307,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, } } +out: + afr_mark_active_sinks (this, sources, locked_on, healed_sinks); return source; } -- cgit