summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-02-29 05:16:50 +0000
committerJeff Darcy <jdarcy@redhat.com>2016-03-04 09:32:18 -0800
commit8ab87137c855c9a0551a9100c146e4ca76dbbda2 (patch)
treec86167ff1a04f29ae91786df6471e73039242518
parentbf80b9005240811d931090afcfee8ca0b02f8212 (diff)
afr: do not set arbiter as a readable subvol in inode context
Problem: If afr_lookup_done() or afr_read_subvol_select_by_policy() chooses the arbiter brick to serve the stat() data, file size will be reported as zero from the mount, despite other data bricks being available. This can break programs like tar which use the stat info to decide how much to read. Fix: In the inode-context, mark arbiter as a non-readable subvol for both data and metadata. It it to be noted that by making this fix, we are *not* going to serve metadata FOPS anymore from the arbiter brick despite the brick storing the metadata. It makes sense to do this because the ever increasing over-loaded FOPs (getxattr returning stat data etc.) and compound FOPS in gluster will otherwise make it difficult to add checks in code to handle corner cases. Change-Id: Ic60b25d77fd05e0897481b7fcb3716d4f2101001 BUG: 1310171 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reported-by: Mat Clayton <mat@mixcloud.com> Reviewed-on: http://review.gluster.org/13539 Reviewed-by: Anuradha Talur <atalur@redhat.com> Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--tests/basic/afr/arbiter-mount.t44
-rw-r--r--tests/basic/afr/arbiter.t14
-rw-r--r--xlators/cluster/afr/src/afr-common.c13
3 files changed, 67 insertions, 4 deletions
diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t
new file mode 100644
index 00000000000..37e06bbbfba
--- /dev/null
+++ b/tests/basic/afr/arbiter-mount.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+. $(dirname $0)/../../nfs.rc
+cleanup;
+
+#Check that mounting fails when only arbiter brick is up.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Doing `mount -t glusterfs $H0:$V0 $M0` fails right away but doesn't work on NetBSD
+# So check that stat <mount> fails instead.
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST ! stat $M0
+TEST umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST stat $M0
+TEST umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -eq 0 ]
+TEST umount $N0
+
+cleanup
diff --git a/tests/basic/afr/arbiter.t b/tests/basic/afr/arbiter.t
index df392cc6e23..6bcc5e9ab17 100644
--- a/tests/basic/afr/arbiter.t
+++ b/tests/basic/afr/arbiter.t
@@ -28,9 +28,13 @@ TEST pidof glusterd
TEST mkdir -p $B0/${V0}{0,1,2}
TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
TEST $CLI volume start $V0
-TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
TEST stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
EXPECT "1" cat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
@@ -48,9 +52,11 @@ TEST kill_brick $V0 $H0 $B0/${V0}1
echo "B2 is down, B3 is the only source, writes will fail" >> $M0/file
EXPECT_NOT "0" echo $?
TEST ! cat $M0/file
-# Metadata I/O should still succeed.
-TEST getfattr -n user.name $M0/file
-TEST setfattr -n user.name -v value3 $M0/file
+# Though metadata IO could have been served from arbiter, we do not allow it
+# anymore as FOPS like getfattr could be overloaded to return iatt buffers for
+# use by other translators.
+TEST ! getfattr -n user.name $M0/file
+TEST ! setfattr -n user.name -v value3 $M0/file
#shd should not data self-heal from arbiter to the sinks.
TEST $CLI volume set $V0 cluster.self-heal-daemon on
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index a7b6ee85a61..5e1acf2ebf4 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -691,6 +691,10 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
data_readable[i] = 1;
metadata_readable[i] = 1;
}
+ if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid) {
@@ -1773,9 +1777,14 @@ unwind:
read_subvol = spb_choice;
else
read_subvol = afr_first_up_child (frame, this);
+
}
par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
readable);
+ if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->replies[read_subvol].poststat,
@@ -2222,6 +2231,10 @@ unwind:
else
read_subvol = afr_first_up_child (frame, this);
}
+ if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->replies[read_subvol].poststat,