cluster/ec: avoid delays in self-heal

Self-heal creates a thread per brick to sweep the index looking for files that need to be healed. These threads are started before the volume comes online, so nothing is done but waiting for the next sweep. This happens once per minute. When a replace brick command is executed, the new graph is loaded and all index sweeper threads started. When all bricks have reported, a getxattr request is sent to the root directory of the volume. This causes a heal on it (because the new brick doesn't have good data), and marks its contents as pending to be healed. This is done by the index sweeper thread on the next round, one minute later. This patch solves this problem by waking all index sweeper threads after a successful check on the root directory. Additionally, the index sweep thread scans the index directory sequentially, but it might happen that after healing a directory entry more index entries are created but skipped by the current directory scan. This causes the remaining entries to be processed on the next round, one minute later. The same can happen in the next round, so the heal is running in bursts and taking a lot to finish, specially on volumes with many directory levels. This patch solves this problem by immediately restarting the index sweep if a directory has been healed. Backport of: > BUG: 1547662 Change-Id: I58d9ab6ef17b30f704dc322e1d3d53b904e5f30e BUG: 1555198 Signed-off-by: Xavi Hernandez <jahernan@redhat.com>
author: Xavi Hernandez <jahernan@redhat.com> 2018-02-21 17:47:37 +0100
committer: Xavi Hernandez <xhernandez@redhat.com> 2018-03-15 08:20:10 +0100
commit: 8fb21afdd033c5d466854400c6a7604fcf5241c3 (patch)
tree: 0e1902bf50584f3f87c5865f043804c9c5979d8e /tests
parent: 2628a91eaaf6a8584492b2d622c27b9d9b8b2e20 (diff)
1 files changed, 41 insertions, 0 deletions
diff --git a/tests/bugs/ec/bug-1547662.t b/tests/bugs/ec/bug-1547662.t
new file mode 100644
index 00000000000..5748218587e
--- /dev/null
+++ b/tests/bugs/ec/bug-1547662.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Immediately after replace-brick, trusted.ec.version will be absent, so if it
+# is present we can assume that heal was started on root
+function root_heal_attempted {
+        if [ -z $(get_hex_xattr trusted.ec.version $1) ]; then
+                echo "N"
+        else
+                echo "Y"
+        fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} disperse 6 redundancy 2 ${H0}:${B0}/${V0}{0..5}
+TEST ${CLI} volume start ${V0}
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+
+TEST mkdir ${M0}/base
+TEST mkdir ${M0}/base/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+
+TEST ${CLI} volume replace-brick ${V0} ${H0}:${B0}/${V0}5 ${H0}:${B0}/${V0}6 commit force
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "Y" glustershd_up_status
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count_shd ${V0} 0
+EXPECT_WITHIN ${HEAL_TIMEOUT} "Y" root_heal_attempted ${B0}/${V0}6
+EXPECT_WITHIN ${HEAL_TIMEOUT} "^0$" get_pending_heal_count ${V0}
+EXPECT "^127$" echo $(find ${B0}/${V0}6/base -type d | wc -l)
+
+cleanup;
author	Xavi Hernandez <jahernan@redhat.com>	2018-02-21 17:47:37 +0100
committer	Xavi Hernandez <xhernandez@redhat.com>	2018-03-15 08:20:10 +0100
commit	8fb21afdd033c5d466854400c6a7604fcf5241c3 (patch)
tree	0e1902bf50584f3f87c5865f043804c9c5979d8e /tests
parent	2628a91eaaf6a8584492b2d622c27b9d9b8b2e20 (diff)