diff options
author | karthik-us <ksubrahm@redhat.com> | 2017-03-18 13:44:56 +0530 |
---|---|---|
committer | Niels de Vos <ndevos@redhat.com> | 2017-04-07 07:56:55 -0400 |
commit | 50ac9c87020d0e9712491c04afbb208a01c6ecbd (patch) | |
tree | d12bd9ccf0eab2bfc252ada99064d9da4f3ed228 | |
parent | cbded629e9ba2d293832e9ed9e81e0390ef29f72 (diff) |
cluster/afr: Undo pending xattrs only on the up bricks
Problem:
While doing conservative merge, even if a brick is down, it will reset
the pending xattr on that. When that brick comes up, as part of the
heal, it will consider this brick as the source and removes the entries
on the other bricks, which leads to data loss.
Fix:
Undo pending only for the bricks which are up.
> Change-Id: I18436fa0bb1faa5f60531b357dea3f6b20446303
> BUG: 1433571
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
> Reviewed-on: https://review.gluster.org/16913
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Ravishankar N <ravishankar@redhat.com>
(cherry picked from commit f91596e6566c605e70a31a60523d11f78a097c3c)
Change-Id: Id20c9ce53ee59f005d977494903247e2a8024ed1
BUG: 1436231
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://review.gluster.org/16956
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r-- | tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t | 89 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 2 |
2 files changed, 90 insertions, 1 deletions
diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t new file mode 100644 index 00000000000..271abb4fe9a --- /dev/null +++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t @@ -0,0 +1,89 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +# Disable self-heal-daemon, client-side-heal and set quorum-type to none +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.quorum-type none + +#Kill bricks 0 & 1 and create a file to have pending entry for 0 & 1 on brick 2 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "file 1" >> $M0/f1 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#Kill bricks 1 & 2 and create a file to have pending entry for 1 & 2 on brick 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "file 2" >> $M0/f2 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#Kill bricks 2 & 0 and create a file to have pending entry for 2 & 0 on brick 1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "file 3" >> $M0/f3 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#Kill brick 0 and turn on the client side heal and do ls to trigger the heal. +#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0. +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on + +TEST ls $M0 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 + +#Bring back all the bricks and trigger the heal again by doing ls. Now the +#pending xattrs on all the bricks should be 0. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ls $M0 + +TEST cat $M0/f1 +TEST cat $M0/f2 +TEST cat $M0/f3 + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 + +#Check whether all the bricks contains all the 3 files. +EXPECT "3" echo $(ls $B0/${V0}0 | wc -l) +EXPECT "3" echo $(ls $B0/${V0}1 | wc -l) +EXPECT "3" echo $(ls $B0/${V0}2 | wc -l) + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 6d1ae1312ea..adf5ab20a6c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -247,7 +247,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, output_matrix[i][j] = 1; if (type == AFR_ENTRY_TRANSACTION) full_heal_mtx_out[i][j] = 1; - } else { + } else if (locked_on[j]) { output_matrix[i][j] = -input_matrix[i][j]; if (type == AFR_ENTRY_TRANSACTION) full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j]; |