From 7287b46042f805d646d7e117c243a1a4fdc61788 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Mon, 8 May 2017 19:29:22 +0530 Subject: glusterd: socketfile & pidfile related fixes for brick multiplexing feature Problem: While brick-muliplexing is on after restarting glusterd, CLI is not showing pid of all brick processes in all volumes. Solution: While brick-mux is on all local brick process communicated through one UNIX socket but as per current code (glusterd_brick_start) it is trying to communicate with separate UNIX socket for each volume which is populated based on brick-name and vol-name.Because of multiplexing design only one UNIX socket is opened so it is throwing poller error and not able to fetch correct status of brick process through cli process. To resolve the problem write a new function glusterd_set_socket_filepath_for_mux that will call by glusterd_brick_start to validate about the existence of socketpath. To avoid the continuous EPOLLERR erros in logs update socket_connect code. Test: To reproduce the issue followed below steps 1) Create two distributed volumes(dist1 and dist2) 2) Set cluster.brick-multiplex is on 3) kill glusterd 4) run command gluster v status After apply the patch it shows correct pid for all volumes > BUG: 1444596 > Change-Id: I5d10af69dea0d0ca19511f43870f34295a54a4d2 > Signed-off-by: Mohit Agrawal > Reviewed-on: https://review.gluster.org/17101 > Smoke: Gluster Build System > Reviewed-by: Prashanth Pai > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Atin Mukherjee > (cherry picked from commit 21c7f7baccfaf644805e63682e5a7d2a9864a1e6) Change-Id: Ia95b9d36e50566b293a8d6350f8316dafc27033b BUG: 1449004 Signed-off-by: Mohit Agrawal Reviewed-on: https://review.gluster.org/17212 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System Reviewed-by: Atin Mukherjee Reviewed-by: Prashanth Pai CentOS-regression: Gluster Build System --- .../bug-1444596_brick_mux_gd_status_restart.t | 68 ++++++++++++++++++++++ .../bug-1444596_brick_mux_posix_hlth_chk_status.t | 44 ++++++++++++++ tests/bugs/glusterd/bug-913555.t | 4 ++ tests/cluster.rc | 10 ---- tests/volume.rc | 30 +++++++++- 5 files changed, 145 insertions(+), 11 deletions(-) create mode 100644 tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t create mode 100644 tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t (limited to 'tests') diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t new file mode 100644 index 00000000000..950cb5f8046 --- /dev/null +++ b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t @@ -0,0 +1,68 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + + +function count_up_bricks { + $CLI --xml volume status $1 | grep '1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +cleanup +TEST glusterd +TEST $CLI volume create $V0 $H0:$B0/brick{0,1} +TEST $CLI volume create $V1 $H0:$B0/brick{2,3} + +TEST $CLI volume set all cluster.brick-multiplex on + +TEST $CLI volume start $V0 +TEST $CLI volume start $V1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1 +EXPECT 1 count_brick_processes + +pkill glusterd +TEST glusterd + +#Check brick status after restart glusterd +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1 + + +TEST $CLI volume stop $V0 +TEST $CLI volume stop $V1 + +cleanup + +TEST glusterd +TEST $CLI volume create $V0 $H0:$B0/brick{0,1} +TEST $CLI volume create $V1 $H0:$B0/brick{2,3} + +TEST $CLI volume set all cluster.brick-multiplex on + +TEST $CLI volume start $V0 +TEST $CLI volume start $V1 + +EXPECT 1 count_brick_processes + +TEST $CLI volume set $V0 performance.cache-size 32MB +TEST $CLI volume stop $V0 +TEST $CLI volume start $V0 + +#Check No. of brick processes after change option +EXPECT 2 count_brick_processes + +pkill glusterd +TEST glusterd + +#Check brick status after restart glusterd should not be NA +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1 +EXPECT 2 count_brick_processes + +cleanup diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t new file mode 100644 index 00000000000..39ab2dd723c --- /dev/null +++ b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t @@ -0,0 +1,44 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + + +function count_up_bricks { + $CLI --xml volume status $1 | grep '1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +cleanup +TEST glusterd -LDEBUG +TEST $CLI volume create $V0 $H0:$B0/brick{0,1} +TEST $CLI volume create $V1 $H0:$B0/brick{2,3} + +TEST $CLI volume set all cluster.brick-multiplex on + +TEST $CLI volume start $V0 +TEST $CLI volume start $V1 + +EXPECT 1 count_brick_processes + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 +TEST rm -rf $H0:$B0/brick{0,1} + +#Check No. of brick processes after remove brick from back-end +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1 + +EXPECT 1 count_brick_processes + +pkill glusterd +TEST glusterd -LDEBUG +sleep 5 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1 + + +cleanup + diff --git a/tests/bugs/glusterd/bug-913555.t b/tests/bugs/glusterd/bug-913555.t index 5c845dc8545..9bc875340d1 100755 --- a/tests/bugs/glusterd/bug-913555.t +++ b/tests/bugs/glusterd/bug-913555.t @@ -16,6 +16,10 @@ function check_peers { $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l } +function online_brick_count { + $CLI_1 --xml volume status | grep '1' | wc -l +} + cleanup; TEST launch_cluster 3; # start 3-node virtual cluster diff --git a/tests/cluster.rc b/tests/cluster.rc index 6dece8182e4..48071647260 100644 --- a/tests/cluster.rc +++ b/tests/cluster.rc @@ -179,13 +179,3 @@ function brick_up_status_1 { $CLI_1 volume status $vol $host:$brick --xml | sed -ne 's/.*\([01]\)<\/status>/\1/p' } -function online_brick_count { - local bricks - local total=0 - local i - for i in $(seq 1 $CLUSTER_COUNT); do - bricks=$(find $B0/$i/glusterd/vols -name '*.pid' | wc -l) - total=$((total+bricks)) - done - echo $total -} diff --git a/tests/volume.rc b/tests/volume.rc index 5419e399d8f..4fe4060b799 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -21,11 +21,39 @@ function brick_count() $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l; } +function check_brick_status() { + cmd="gluster --xml volume status" + local daemon=$1 + + if [[ -z $daemon ]] + then + echo `$cmd | grep '1' | wc -l` + else + echo `$cmd | grep -A 5 ${daemon} | grep '1' | wc -l` + fi +} + function online_brick_count () { - find $GLUSTERD_WORKDIR/vols/ -name '*.pid' | wc -l + local v1=0 + local v2=0 + local v3=0 + local v4=0 + local v5=0 + local tot=0 + + #First count total Number of bricks and then subtract daemon status + v1=`check_brick_status` + v2=`check_brick_status "Self-heal"` + v3=`check_brick_status "Quota"` + v4=`check_brick_status "Snapshot"` + v5=`check_brick_status "Tier"` + tot=$((v1-v2-v3-v4-v5)) + echo $tot + } + function brick_up_status { local vol=$1 local host=$2 -- cgit