From 924702de358160b2536138c073d293b76512838a Mon Sep 17 00:00:00 2001
From: Jeff Darcy <jdarcy@redhat.com>
Date: Mon, 3 Dec 2012 12:16:28 -0500
Subject: glusterd: add "volume label" command

This command is necessary when the local disk/filesystem containing a brick
is unexpectedly lost and then recreated.  Since 961bc80c, trying to start
the brick will fail because the trusted.glusterfs.volume-id xattr is
missing, and if we can't start it then we can't replace-brick or self-heal
so we're stuck in a permanently degraded state.  This command provides a
way to label the empty brick with the proper volume ID so that further
repair actions become possible.

Change-Id: I1c1e5273a018b7a6b8d0852daf111ddc3fddfdc2
BUG: 860297
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/4259
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
---
 tests/bugs/bug-860297.t | 88 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 81 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 tests/bugs/bug-860297.t

(limited to 'tests/bugs/bug-860297.t')

diff --git a/tests/bugs/bug-860297.t b/tests/bugs/bug-860297.t
old mode 100644
new mode 100755
index 2a3ca7a7a..fa1b1ff28
--- a/tests/bugs/bug-860297.t
+++ b/tests/bugs/bug-860297.t
@@ -1,13 +1,87 @@
 #!/bin/bash
+
 . $(dirname $0)/../include.rc
-cleanup;
+cleanup
+
+function recreate {
+	# The rm is necessary so we don't get fooled by leftovers from old runs.
+	rm -rf $1 && mkdir -p $1
+}
+
+function count_bricks {
+	local count
+	local pid
+	count=0
+	for pid in /var/lib/glusterd/vols/${1}/run/*pid; do
+		if kill -0 $(cat $pid); then
+			count=$((count+1))
+		fi
+	done
+	echo $count
+}
 
 TEST glusterd
 TEST pidof glusterd
-TEST $CLI volume info
-TEST $CLI volume create $V0 $H0:$B0/brick1
-setfattr -x trusted.glusterfs.volume-id $B0/brick1
-## If Extended attribute trusted.glusterfs.volume-id is not present
-## then volume should not be able to start
-TEST ! $CLI volume start $V0;
+TEST $CLI volume info;
+
+## Start and create a volume
+TEST recreate ${B0}/${V0}-0
+TEST recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+function volinfo_field()
+{
+    local vol=$1;
+    local field=$2;
+
+    $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify that all bricks start.
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 2 count_bricks $V0
+TEST $CLI volume stop $V0
+
+# Nuke one of the bricks and make sure it *doesn't* start.
+TEST recreate ${B0}/${V0}-1
+# We can't do the usual TEST/startup thing here because of another bug.  If
+# a server fails to start a brick, it won't start any others either.  Since
+# all of our bricks in testing are on one server, that means no bricks start
+# and so the volume doesn't start either.  Changing the order etc. doesn't
+# help, because the attempted startup order is non-deterministic.  Instead,
+# we just don't rely on whether or not the volume starts; the brick count is
+# sufficient for our purposes.
+$CLI volume start $V0;
+EXPECT 1 count_bricks $V0
+# If we can't depend on the volume starting, we can't depend on it stopping
+# either.
+$CLI volume stop $V0
+
+# Label the recreated brick and make sure it starts now.
+TEST $CLI volume label $V0 ${H0}:${B0}/${V0}-1
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 2 count_bricks $V0
+
+# Make sure we can mount and use the volume.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST dd if=/dev/zero of=$M0/block bs=4k count=1
+
+if [ "$EXIT_EARLY" = "1" ]; then
+	exit 0;
+fi
+
+## Finish up
+TEST umount $M0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
 cleanup;
-- 
cgit