From 1a95fc3036db51b82b6a80952f0908bc2019d24a Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Thu, 8 Dec 2016 16:24:15 -0500 Subject: core: run many bricks within one glusterfsd process This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb BUG: 1385758 Signed-off-by: Jeff Darcy Reviewed-on: https://review.gluster.org/14763 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Vijay Bellur --- tests/volume.rc | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'tests/volume.rc') diff --git a/tests/volume.rc b/tests/volume.rc index bd51893392e..2062f42940e 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -246,19 +246,43 @@ function quotad_up_status { gluster volume status | grep "Quota Daemon" | awk '{print $7}' } -function get_brick_pid { +function get_brick_pidfile { local vol=$1 local host=$2 local brick=$3 local brick_hiphenated=$(echo $brick | tr '/' '-') - echo `cat $GLUSTERD_WORKDIR/vols/$vol/run/${host}${brick_hiphenated}.pid` + echo $GLUSTERD_WORKDIR/vols/$vol/run/${host}${brick_hiphenated}.pid +} + +function get_brick_pid { + cat $(get_brick_pidfile $*) } function kill_brick { local vol=$1 local host=$2 local brick=$3 - kill -9 $(get_brick_pid $vol $host $brick) + + local pidfile=$(get_brick_pidfile $vol $host $brick) + local cmdline="/proc/$(cat $pidfile)/cmdline" + local socket=$(cat $cmdline | tr '\0' '\n' | grep '\.socket$') + + gf_attach -d $socket $brick + # Since we're not going through glusterd, we need to clean up the + # pidfile ourselves. However, other state in glusterd (e.g. + # started_here) won't be updated. A "stop-brick" CLI command would + # sure be useful. + rm -f $pidfile + + # When the last brick in a process is terminated, the process has to + # sleep for a second to give the RPC response a chance to get back to + # GlusterD. Without that, we get random failures in tests that use + # "volume stop" whenever the process termination is observed before the + # RPC response. However, that same one-second sleep can cause other + # random failures in tests that assume a brick will already be gone + # before "gf_attach -d" returns. There are too many of those to fix, + # so we compensate by putting the same one-second sleep here. + sleep 1 } function check_option_help_presence { -- cgit