#!/bin/bash

#
# Test the scenario where a SHD daemon suffers a frame timeout during a
# crawl.  The expected behavior is that present crawl will continue
# after the timeout and not deadlock.
#

. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc

cleanup;

function wait_for_shd_no_sink() {
  local TIMEOUT=$1
  # If we see the "no active sinks" log message we know
  # the heal is alive.  It cannot proceed as the "sink"
  # is hung, but it's at least alive and trying.
  timeout $TIMEOUT grep -q 'replicate-0: no active sinks for' \
    <(tail -fn0 /var/log/glusterfs/glustershd.log)
  return $?
}

TEST glusterd
TEST pidof glusterd
TEST $CLI volume info 2> /dev/null;

# Setup a cluster with 3 replicas, and fav child by majority on
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3};
TEST $CLI volume set $V0 network.frame-timeout 2
TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.entry-self-heal off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.self-heal-daemon on
TEST $CLI volume set $V0 cluster.heal-timeout 10
TEST $CLI volume start $V0
sleep 5

# Mount the volume
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \
  --attribute-timeout=0 --entry-timeout=0

# Kill bricks 1
TEST kill_brick $V0 $H0 $B0/${V0}1
sleep 1

# Write some data into the mount which will require healing
cd $M0
for i in {1..1000}; do
  dd if=/dev/urandom of=testdata_$i bs=64k count=1 2>/dev/null
done

# Re-start the brick
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0

sleep 1
TEST hang_brick $V0 $H0 $B0/${V0}1
sleep 4
TEST wait_for_shd_no_sink 20
cleanup

#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000