From a7ce0548b7969050644891cd90c0bf134fa1594c Mon Sep 17 00:00:00 2001
From: Jeff Darcy <jdarcy@redhat.com>
Date: Mon, 20 Mar 2017 12:32:33 -0400
Subject: glusterd: hold off volume deletes while still restarting bricks

We need to do this because modifying the volume/brick tree while
glusterd_restart_bricks is still walking it can lead to segfaults.
Without waiting we could accidentally "slip in" while attach_brick has
released big_lock between retries and make such a modification.

Change-Id: I30ccc4efa8d286aae847250f5d4fb28956a74b03
BUG: 1432542
Signed-off-by: Jeff Darcy <jeff@pl.atyp.us>
Reviewed-on: https://review.gluster.org/16927
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 .../bugs/core/bug-1421590-brick-mux-resuse-ports.t | 55 -------------
 .../bugs/core/bug-1421590-brick-mux-reuse-ports.t  | 60 ++++++++++++++
 tests/bugs/core/bug-1432542-mpx-restart-crash.t    | 91 ++++++++++++++++++++++
 3 files changed, 151 insertions(+), 55 deletions(-)
 delete mode 100644 tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t
 create mode 100644 tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t
 create mode 100644 tests/bugs/core/bug-1432542-mpx-restart-crash.t

(limited to 'tests')
diff --git a/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t b/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t
deleted file mode 100644
index ed401f6e6ad..00000000000
--- a/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../traps.rc
-. $(dirname $0)/../../volume.rc
-
-function get_nth_brick_port_for_volume () {
-        local VOL=$1
-        local n=$2
-
-        $CLI volume status $VOL --xml | sed -ne 's/.*<port>\([-0-9]*\)<\/port>/\1/p' \
-                                      | head -n $n | tail -n 1
-}
-
-TEST glusterd
-
-TEST $CLI volume set all cluster.brick-multiplex on
-push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
-push_trapfunc "cleanup"
-
-TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
-TEST $CLI volume start $V0
-
-port_brick0=$(get_nth_brick_port_for_volume $V0 1)
-
-# restart the volume
-TEST $CLI volume stop $V0
-TEST $CLI volume start $V0
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
-
-TEST $CLI volume stop $V0
-TEST $CLI volume set all cluster.brick-multiplex off
-
-TEST $CLI volume start $V0
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
-
-port_brick1=$(get_nth_brick_port_for_volume $V0 2)
-
-# restart the volume
-TEST $CLI volume stop $V0
-TEST $CLI volume start $V0
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick1 get_nth_brick_port_for_volume $V0 2
-
-TEST $CLI volume stop $V0
-
-TEST $CLI volume set all cluster.brick-multiplex on
-
-TEST $CLI volume start $V0
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
-
diff --git a/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t b/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t
new file mode 100644
index 00000000000..a227f8275ed
--- /dev/null
+++ b/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function get_nth_brick_port_for_volume () {
+        local VOL=$1
+        local n=$2
+
+        $CLI volume status $VOL --xml | sed -ne 's/.*<port>\([-0-9]*\)<\/port>/\1/p' \
+                                      | head -n $n | tail -n 1
+}
+
+TEST glusterd
+
+TEST $CLI volume set all cluster.brick-multiplex on
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
+push_trapfunc "cleanup"
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+
+# We can't expect a valid port number instantly.  We need to wait for the
+# bricks to finish coming up.  In every other case we use EXPECT_WITHIN, but
+# this first time we need to wait more explicitly.
+sleep $PROCESS_UP_TIMEOUT
+
+port_brick0=$(get_nth_brick_port_for_volume $V0 1)
+
+# restart the volume
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume set all cluster.brick-multiplex off
+
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
+
+port_brick1=$(get_nth_brick_port_for_volume $V0 2)
+
+# restart the volume
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick1 get_nth_brick_port_for_volume $V0 2
+
+TEST $CLI volume stop $V0
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT $port_brick0 get_nth_brick_port_for_volume $V0 1
+
diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
new file mode 100644
index 00000000000..970a181c83d
--- /dev/null
+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+NUM_VOLS=20
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried.  That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail.  Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate.  After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures.  Arguably,
+# those are spurious because we will eventually catch up.  We're just not
+# ready *yet*.  More to the point, even if the errors aren't spurious that's
+# not what we're testing right now.  Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+	printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+	printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+create_volume () {
+
+	local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+	local brick_base=$(get_brick_base $1)
+	local cmd="$CLI volume create $vol_name replica 2"
+	local b
+	for b in $(seq 0 5); do
+		local this_brick=${brick_base}/brick$b
+		mkdir -p $this_brick
+		cmd="$cmd $H0:$this_brick"
+	done
+	TEST $cmd
+	TEST $CLI volume start $vol_name
+	EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $vol_name "Status"
+	local mount_point=$(get_mount_point $1)
+	mkdir -p $mount_point
+	TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+	local v
+	for v in $(seq 1 $NUM_VOLS); do
+		local mount_point=$(get_mount_point $v)
+		force_umount $mount_point
+		rm -rf $mount_point
+		local vol_name=$(printf "%s-vol%02d" $V0 $v)
+		$CLI volume stop $vol_name
+		$CLI volume delete $vol_name
+		rm -rf $(get_brick_base $1) &
+	done &> /dev/null
+	wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here.  The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=95
+for i in $(seq 1 $NUM_VOLS); do
+	create_volume $i
+	TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+done
+
+# Kill glusterd, and wait a bit for all traces to disappear.
+TEST killall -9 glusterd
+sleep 5
+TEST killall -9 glusterfsd
+sleep 5
+
+# Restart glusterd.  This is where the brick daemon supposedly dumps core,
+# though I (jdarcy) have yet to see that.  Again, give it a while to settle,
+# just to be sure.
+TEST glusterd
+
+cleanup_func
+trap - EXIT
+cleanup
-- 
cgit