afr: do not set arbiter as a readable subvol in inode context

Problem: If afr_lookup_done() or afr_read_subvol_select_by_policy() chooses the arbiter brick to serve the stat() data, file size will be reported as zero from the mount, despite other data bricks being available. This can break programs like tar which use the stat info to decide how much to read. Fix: In the inode-context, mark arbiter as a non-readable subvol for both data and metadata. It it to be noted that by making this fix, we are *not* going to serve metadata FOPS anymore from the arbiter brick despite the brick storing the metadata. It makes sense to do this because the ever increasing over-loaded FOPs (getxattr returning stat data etc.) and compound FOPS in gluster will otherwise make it difficult to add checks in code to handle corner cases. >Change-Id: Ic60b25d77fd05e0897481b7fcb3716d4f2101001 >BUG: 1310171 >Signed-off-by: Ravishankar N <ravishankar@redhat.com> >Reported-by: Mat Clayton <mat@mixcloud.com> >Reviewed-on: http://review.gluster.org/13539 >Reviewed-by: Anuradha Talur <atalur@redhat.com> >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> >Smoke: Gluster Build System <jenkins@build.gluster.com> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.com> >Reviewed-by: Jeff Darcy <jdarcy@redhat.com> BUG: 1313921 Change-Id: I07fc08d633ca2af48f7354454bc2ab75cedb850a Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/13609 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
author: Ravishankar N <ravishankar@redhat.com> 2016-02-29 05:16:50 +0000
committer: Pranith Kumar Karampuri <pkarampu@redhat.com> 2016-03-07 00:32:00 -0800
commit: 10e091508ca6e5af815fce612be48287d354a01b (patch)
tree: 6f1f69af59887c7d7f125f9e359d35c918094870
parent: 6fbaa124e33e6604621826c54eac987dfe590d27 (diff)
3 files changed, 67 insertions, 4 deletions
diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t
new file mode 100644
index 00000000000..47c327633f3
--- /dev/null
+++ b/tests/basic/afr/arbiter-mount.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+. $(dirname $0)/../../nfs.rc
+cleanup;
+
+#Check that mounting fails when only arbiter brick is up.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Doing `mount -t glusterfs $H0:$V0 $M0` fails right away but doesn't work on NetBSD
+# So check that stat <mount> fails instead.
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST  stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -eq 0 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup
diff --git a/tests/basic/afr/arbiter.t b/tests/basic/afr/arbiter.t
index be8f676d1ec..c91e2e90098 100644
--- a/tests/basic/afr/arbiter.t
+++ b/tests/basic/afr/arbiter.t
@@ -20,9 +20,13 @@ TEST $CLI volume delete $V0
 # Create and mount a replica 3 arbiter volume.
 TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
 TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
 TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
 TEST $CLI volume start $V0
-TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
 TEST stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
 EXPECT "1" cat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
 
@@ -40,9 +44,11 @@ TEST kill_brick $V0 $H0 $B0/${V0}1
 echo "B2 is down, B3 is the only source, writes will fail" >> $M0/file
 EXPECT_NOT "0" echo $?
 TEST ! cat $M0/file
-# Metadata I/O should still succeed.
-TEST getfattr -n user.name $M0/file
-TEST setfattr -n user.name -v value3 $M0/file
+# Though metadata IO could have been served from arbiter, we do not allow it
+# anymore as FOPS like getfattr could be overloaded to return iatt buffers for
+# use by other translators.
+TEST ! getfattr -n user.name $M0/file
+TEST ! setfattr -n user.name -v value3 $M0/file
 
 #shd should not data self-heal from arbiter to the sinks.
 TEST $CLI volume set $V0 cluster.self-heal-daemon on
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 36c22ad7dc0..bfff6048799 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -695,6 +695,10 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
 		data_readable[i] = 1;
 		metadata_readable[i] = 1;
 	}
+        if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
+                data_readable[ARBITER_BRICK_INDEX] =  0;
+                metadata_readable[ARBITER_BRICK_INDEX] = 0;
+        }
 
 	for (i = 0; i < priv->child_count; i++) {
 		if (!replies[i].valid) {
@@ -1792,9 +1796,14 @@ unwind:
                         read_subvol = spb_choice;
                 else
                         read_subvol = afr_first_up_child (frame, this);
+
         }
         par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
                                                       readable);
+        if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
+                        local->op_ret = -1;
+                        local->op_errno = ENOTCONN;
+        }
 
 	AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
 			  local->inode, &local->replies[read_subvol].poststat,
@@ -2241,6 +2250,10 @@ unwind:
                 else
                         read_subvol = afr_first_up_child (frame, this);
         }
+        if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) {
+                        local->op_ret = -1;
+                        local->op_errno = ENOTCONN;
+        }
 
 	AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
 			  local->inode, &local->replies[read_subvol].poststat,
author	Ravishankar N <ravishankar@redhat.com>	2016-02-29 05:16:50 +0000
committer	Pranith Kumar Karampuri <pkarampu@redhat.com>	2016-03-07 00:32:00 -0800
commit	10e091508ca6e5af815fce612be48287d354a01b (patch)
tree	6f1f69af59887c7d7f125f9e359d35c918094870
parent	6fbaa124e33e6604621826c54eac987dfe590d27 (diff)