nfs: do opendir for "naked" readdirp to force self-heal checks

Instead of an opendir, the first thing the Linux NFS client usually sends us is a readdirp at offset zero, effectively bypassing our self-heal checks. Detect this condition and issue our own opendir to compensate. Change-Id: I69463370abd6235d705bf80b8c77fae4a61096ae BUG: 830665 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/4067 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Jeff Darcy <jdarcy@redhat.com> 2012-12-03 06:40:11 -0500
committer: Anand Avati <avati@redhat.com> 2012-12-03 21:55:50 -0800
commit: bb5382208696196aead94c011b3f9fa13a04da68 (patch)
tree: 80f62366c7a7e709c3aa01f7809c2e43b8886dfb
parent: 4675796361f19a9f1f897465587737391d94a235 (diff)
2 files changed, 138 insertions, 0 deletions
diff --git a/tests/bugs/bug-830665.t b/tests/bugs/bug-830665.t
new file mode 100755
index 00000000000..327037de487
--- /dev/null
+++ b/tests/bugs/bug-830665.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+H0=$(hostname -s)
+
+function recreate {
+	rm -rf $1 && mkdir -p $1
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+recreate ${B0}/${V0}-0
+recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+function volinfo_field()
+{
+    local vol=$1;
+    local field=$2;
+
+    $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure stat-prefetch doesn't prevent self-heal checks.
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+## Mount NFS
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0 $N0;
+
+## Create some files and directories
+echo "test_data" > $N0/a_file;
+mkdir $N0/a_dir;
+echo "more_test_data" > $N0/a_dir/another_file;
+
+## Unmount and stop the volume.
+TEST umount $N0;
+TEST $CLI volume stop $V0;
+
+# Recreate the brick. Note that because of http://review.gluster.org/#change,4202
+# we need to preserve and restore the volume ID or else the brick (and thus the
+# entire not-very-HA-any-more volume) won't start. When that bug is fixed, we can
+# remove the [gs]etxattr calls.
+volid=$(getfattr -e hex -n trusted.glusterfs.volume-id $B0/${V0}-0 2> /dev/null \
+	| grep = | cut -d= -f2)
+rm -rf $B0/${V0}-0;
+mkdir $B0/${V0}-0;
+setfattr -n trusted.glusterfs.volume-id -v $volid $B0/${V0}-0
+
+## Restart and remount. Note that we use actimeo=0 so that the stat calls
+## we need for self-heal don't get blocked by the NFS client.
+TEST $CLI volume start $V0;
+sleep 5
+TEST mount -t nfs -o vers=3,nolock,soft,intr,actimeo=0 $H0:/$V0 $N0;
+
+## The Linux NFS client has a really charming habit of caching stuff right
+## after mount, even though we set actimeo=0 above. Life would be much easier
+## if NFS developers cared as much about correctness as they do about shaving
+## a few seconds off of benchmarks.
+ls -l $N0 &> /dev/null;
+sleep 5;
+
+## Force entry self-heal.
+find $N0 | xargs stat > /dev/null;
+#ls -lR $N0 > /dev/null;
+
+## Do NOT check through the NFS mount here. That will force a new self-heal
+## check, but we want to test whether self-heal already happened.
+
+## Make sure everything's in order on the recreated brick.
+EXPECT 'test_data' cat $B0/${V0}-0/a_file;
+EXPECT 'more_test_data' cat $B0/${V0}-0/a_dir/another_file;
+
+if [ "$EXIT_EARLY" = "1" ]; then
+	exit 0;
+fi
+
+## Finish up
+TEST umount $N0;
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index 7c01d0301ba..3a4ad4c0a96 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -4270,12 +4270,30 @@ nfs3err:
 }
 
 
+int32_t
+nfs3svc_readdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                             int32_t op_ret, int32_t op_errno, fd_t *fd,
+                             dict_t *xdata)
+{
+        /*
+         * We don't really need this, it's just an artifact of forcing the
+         * opendir to happen.
+         */
+        if (fd) {
+                fd_unref(fd);
+        }
+
+        return 0;
+}
+
+
 int
 nfs3_readdir_open_resume (void *carg)
 {
         nfsstat3                stat = NFS3ERR_SERVERFAULT;
         int                     ret = -EFAULT;
         nfs3_call_state_t       *cs = NULL;
+        nfs_user_t               nfu = {0, };
 
         if (!carg)
                 return ret;
@@ -4288,6 +4306,22 @@ nfs3_readdir_open_resume (void *carg)
                 goto nfs3err;
         }
 
+        /*
+         * NFS client will usually send us a readdirp without an opendir,
+         * which would cause us to skip our usual self-heal checks which occur
+         * in opendir for native protocol. To make sure those checks do happen,
+         * our most reliable option is to do our own opendir for any readdirp
+         * at the beginning of the directory.
+         */
+        if (cs->cookie == 0) {
+                nfs_request_user_init (&nfu, cs->req);
+                ret = nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+                                   nfs3svc_readdir_opendir_cbk, cs);
+                if (ret < 0) {
+                        gf_log (GF_NFS3, GF_LOG_ERROR, "auto-opendir failed");
+                }
+        }
+
         ret = nfs3_readdir_read_resume (cs);
         if (ret < 0)
                 stat = nfs3_errno_to_nfsstat3 (-ret);
author	Jeff Darcy <jdarcy@redhat.com>	2012-12-03 06:40:11 -0500
committer	Anand Avati <avati@redhat.com>	2012-12-03 21:55:50 -0800
commit	bb5382208696196aead94c011b3f9fa13a04da68 (patch)
tree	80f62366c7a7e709c3aa01f7809c2e43b8886dfb
parent	4675796361f19a9f1f897465587737391d94a235 (diff)