From 9400b6f2c8aa219a493961e0ab9770b7f12e80d2 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Thu, 12 Jul 2018 13:29:48 +0530 Subject: glusterd: Add multiple checks before attach/start a brick Problem: In brick mux scenario sometime glusterd is not able to start/attach a brick and gluster v status shows brick is already running Solution: 1) To make sure brick is running check brick_path in /proc//fd , if a brick is consumed by the brick process it means brick stack is come up otherwise not 2) Before start/attach a brick check if a brick is mounted or not 3) At the time of printing volume status check brick is consumed by any brick process Test: To test the same followed procedure 1) Setup brick mux environment on a vm 2) Put a breaking point in gdb in function posix_health_check_thread_proc at the time of notify GF_EVENT_CHILD_DOWN event 3) unmount anyone brick path forcefully 4) check gluster v status it will show N/A for the brick 5) Try to start volume with force option, glusterd throw message "No device available for mount brick" 6) Mount the brick_root path 7) Try to start volume with force option 8) down brick is started successfully Change-Id: I91898dad21d082ebddd12aa0d1f7f0ed012bdf69 fixes: bz#1595320 Signed-off-by: Mohit Agrawal --- tests/basic/bug-1595320.t | 92 +++++++++++++++++++++++++++++++++++++++ tests/basic/posix/shared-statfs.t | 2 + 2 files changed, 94 insertions(+) create mode 100644 tests/basic/bug-1595320.t (limited to 'tests/basic') diff --git a/tests/basic/bug-1595320.t b/tests/basic/bug-1595320.t new file mode 100644 index 00000000000..9d856eeadec --- /dev/null +++ b/tests/basic/bug-1595320.t @@ -0,0 +1,92 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../snapshot.rc + +cleanup + +function count_up_bricks { + $CLI --xml volume status $V0 | grep '1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +# Setup 3 LVMS +LVM_PREFIX="test" +TEST init_n_bricks 3 +TEST setup_lvm 3 + +# Start glusterd +TEST glusterd +TEST pidof glusterd + +# Create volume and enable brick multiplexing +TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3 +gluster v set all cluster.brick-multiplex on + +# Start the volume +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks +EXPECT 1 count_brick_processes + +# Kill volume ungracefully +brick_pid=`pgrep glusterfsd` + +# Make sure every brick root should be consumed by a brick process +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] + +b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid) +b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid) +b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid) + +kill -9 $brick_pid +EXPECT 0 count_brick_processes + +# Unmount 3rd brick root from node +brick_root=$L3 +TEST umount -l $brick_root 2>/dev/null + +# Start the volume only 2 brick should be start +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks +EXPECT 1 count_brick_processes + +brick_pid=`pgrep glusterfsd` + +# Make sure only two brick root should be consumed by a brick process +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 0 ] + +# Mount the brick root +TEST mount -t xfs -o nouuid /dev/test_vg_3/brick_lvm $brick_root + +# Replace brick_pid file to test brick_attach code +TEST cp $b1_pid_file $b3_pid_file + +# Start the volume all brick should be up +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks +EXPECT 1 count_brick_processes + +# Make sure every brick root should be consumed by a brick process +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] +n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` +TEST [ $n -eq 1 ] + +cleanup diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t index 8caa9fa2110..33439562ec9 100644 --- a/tests/basic/posix/shared-statfs.t +++ b/tests/basic/posix/shared-statfs.t @@ -23,6 +23,7 @@ TEST MOUNT_LOOP $LO2 $B0/${V0}2 # Create a subdir in mountpoint and use that for volume. TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') # Keeping the size less than 200M mainly because XFS will use @@ -38,6 +39,7 @@ EXPECT 'Stopped' volinfo_field $V0 'Status'; TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3 TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] -- cgit