summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2017-04-19 16:40:05 +0530
committerNiels de Vos <ndevos@redhat.com>2017-04-29 11:26:24 +0000
commita6d313d12c98cf533c6bbb10f491dd2ec48ca89c (patch)
treec0a8dabe09f3d795c85c09653b585544734443dc
parentddf3d4d3785bbd4f964e54ebc72fd445662f21a5 (diff)
afr: don't do a post-op on a brick if op failed
Problem: In afr-v2, self-blaming xattrs are not there by design. But if the FOP failed on a brick due to an error other than ENOTCONN (or even due to ENOTCONN, but we regained connection before postop was wound), we wind the post-op also on the failed brick, leading to setting self-blaming xattrs on that brick. This can lead to undesired results like healing of files in split-brain etc. Fix: If a fop failed on a brick on which pre-op was successful, do not perform post-op on it. This also produces the desired effect of not resetting the dirty xattr on the brick, which is how it should be because if the fop failed on a brick, there is no reason to clear the dirty bit which actually serves as an indication of the failure. > Reviewed-on: https://review.gluster.org/16976 > Smoke: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Change-Id: I5f1caf4d1b39f36cf8093ccef940118638caa9c4 BUG: 1443319 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: https://review.gluster.org/17082 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r--tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t46
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c12
2 files changed, 56 insertions, 2 deletions
diff --git a/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
new file mode 100644
index 00000000000..edfd0d7820d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+NEW_USER=bug1438255
+NEW_UID=1438255
+NEW_GID=1438255
+
+TEST groupadd -o -g ${NEW_GID} ${NEW_USER}-${NEW_GID}
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -K MAIL_DIR=/dev/null ${NEW_USER}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST touch $M0/FILE
+TEST kill_brick $V0 $H0 $B0/${V0}2
+chown $NEW_UID:$NEW_GID $M0/FILE
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# setfattr done as NEW_USER fails on 3rd brick with EPERM but suceeds on
+# the first 2 and hence on the mount.
+su -m bug1438255 -c "setfattr -n user.myattr -v myvalue $M0/FILE"
+TEST [ $? -eq 0 ]
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+# Brick 3 does not have any self-blaming pending xattr.
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
+
+TEST userdel --force ${NEW_USER}
+TEST groupdel ${NEW_USER}-${NEW_GID}
+cleanup
+
+
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 8178fc0d18b..83e25f3a122 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -588,11 +588,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
int
afr_changelog_call_count (afr_transaction_type type,
unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
unsigned int child_count)
{
+ int i = 0;
int call_count = 0;
- call_count = AFR_COUNT(pre_op_subvols, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (pre_op_subvols[i] && !failed_subvols[i]) {
+ call_count++;
+ }
+ }
if (type == AFR_ENTRY_RENAME_TRANSACTION)
call_count *= 2;
@@ -1244,6 +1250,7 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
call_count = afr_changelog_call_count (local->transaction.type,
local->transaction.pre_op,
+ local->transaction.failed_subvols,
priv->child_count);
if (call_count == 0) {
@@ -1257,7 +1264,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
local->transaction.changelog_resume = changelog_resume;
for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
+ if (!local->transaction.pre_op[i] ||
+ local->transaction.failed_subvols[i])
continue;
switch (local->transaction.type) {