1 files changed, 147 insertions, 36 deletions
diff --git a/tests/include.rc b/tests/include.rc
index 5af4b241bf2..0dc7d830449 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -1,14 +1,19 @@
+
+checkpoint_time="$(date +%s%N)"
+
 M0=${M0:=/mnt/glusterfs/0};   # 0th mount point for FUSE
 M1=${M1:=/mnt/glusterfs/1};   # 1st mount point for FUSE
 M2=${M2:=/mnt/glusterfs/2};   # 2nd mount point for FUSE
+M3=${M3:=/mnt/glusterfs/3};   # 3rd mount point for FUSE
 N0=${N0:=/mnt/nfs/0};         # 0th mount point for NFS
 N1=${N1:=/mnt/nfs/1};         # 1st mount point for NFS
 V0=${V0:=patchy};             # volume name to use in tests
 V1=${V1:=patchy1};            # volume name to use in tests
 GMV0=${GMV0:=master};	      # master volume name to use in geo-rep tests
 GSV0=${GSV0:=slave};	      # slave volume name to use in geo-rep tests
+GSV1=${GSV1:=slave1};	      # slave volume name to use in geo-rep tests
 B0=${B0:=/d/backends};        # top level of brick directories
-WORKDIRS="$B0 $M0 $M1 $M2 $N0 $N1"
+WORKDIRS="$B0 $M0 $M1 $M2 $M3 $N0 $N1"
 
 ROOT_GFID="00000000-0000-0000-0000-000000000001"
 DOT_SHARD_GFID="be318638-e8a0-4c6d-977d-7a937aa84806"
@@ -30,6 +35,7 @@ while true; do
                 ENV_RC="/not/found"
                 break
         fi
+        old_dir=$env_dir
         env_dir=$new_dir
 done
 
@@ -69,12 +75,12 @@ esac
 DEBUG=${DEBUG:=0}             # turn on debugging?
 
 PROCESS_DOWN_TIMEOUT=5
-PROCESS_UP_TIMEOUT=30
+PROCESS_UP_TIMEOUT=45
 NFS_EXPORT_TIMEOUT=20
 CHILD_UP_TIMEOUT=20
 PROBE_TIMEOUT=60
 PEER_SYNC_TIMEOUT=20
-REBALANCE_TIMEOUT=360
+REBALANCE_TIMEOUT=600
 REOPEN_TIMEOUT=20
 HEAL_TIMEOUT=80
 IO_HEAL_TIMEOUT=120
@@ -87,6 +93,7 @@ GRAPH_SWITCH_TIMEOUT=10
 UNLINK_TIMEOUT=5
 MDC_TIMEOUT=5
 IO_WAIT_TIMEOUT=5
+DISK_FAIL_TIMEOUT=80
 
 LOGDIR=$(gluster --print-logdir)
 
@@ -99,6 +106,24 @@ CLI_NO_FORCE="gluster --mode=script";
 # root partition is ignored while running the command in a "no force" mode
 CLI_IGNORE_PARTITION="gluster --mode=script --wignore-partition"
 
+function wait_delay() {
+        local delay="$1"
+        local interval="$2"
+        shift 2
+        local deadline="$(($(date +%s%N) + ${delay}000000000))"
+
+        $*
+        while [[ $? -ne 0 ]]; do
+                if [[ $(date +%s%N) -ge ${deadline} ]]; then
+                        return 1
+                fi
+                sleep ${interval}
+                $*
+        done
+
+        return 0
+}
+
 _GFS () {
 	glusterfs "$@"
 	local mount_ret=$?
@@ -110,8 +135,8 @@ _GFS () {
 	while true; do
 		touch $mount_point/xy_zzy 2> /dev/null && break
 		i=$((i+1))
-		[ $i -lt 10 ] || break
-		sleep 1
+		[ $i -lt 100 ] || break
+		sleep 0.1
 	done
 	rm -f $mount_point/xy_zzy
 	return $mount_ret
@@ -187,6 +212,7 @@ function test_header()
         dbg "=========================";
         dbg "TEST $t (line $TESTLINE): $*";
         saved_cmd="$*"
+        start_time="$(date +%s%N)"
 }
 
 
@@ -195,15 +221,18 @@ function test_footer()
         RET=$?
         local lineno=$1
         local err=$2
-
+        local end_time
+        local elapsed1
+        local elapsed2
+
+        end_time="$(date +%s%N)"
+        elapsed1="$(((start_time - checkpoint_time) / 1000000))"
+        elapsed2="$(((end_time - start_time) / 1000000))"
+        checkpoint_time="$end_time"
         if [ $RET -eq 0 ]; then
-                echo "ok $t, LINENUM:$lineno";
+                printf "ok %3d [%7d/%7d] <%4d> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd";
         else
-                echo "not ok $t $err, LINENUM:$lineno";
-                # With DEBUG, this was already printed out, so skip it.
-                if [ x"$DEBUG" = x"0" ]; then
-                        echo "FAILED COMMAND: $saved_cmd"
-                fi
+                printf "not ok %3d [%7d/%7d] <%4d> '%s' -> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd" "$err"
                 if [ "$EXIT_EARLY" = "1" ]; then
 			cleanup
                         exit $RET
@@ -336,12 +365,12 @@ function _EXPECT_WITHIN()
         a="";
         shift;
 
-        local endtime=$(( ${timeout}+`date +%s` ))
+        local endtime="$(( ${timeout}000000000 + $(date +%s%N) ))"
 
         # We *want* this to be globally visible.
         EW_RETRIES=0
 
-        while [ `date +%s` -lt $endtime ]; do
+        while [[ "$(date +%s%N)" < "$endtime" ]]; do
                 a=$("$@" | tail -1 ; exit ${PIPESTATUS[0]})
                 ## Check command success
                 if [ $? -ne 0 ]; then
@@ -351,7 +380,7 @@ function _EXPECT_WITHIN()
                 if [[ "$a" =~ $e ]]; then
                         break;
                 fi
-                sleep 1;
+                sleep 0.25;
                 EW_RETRIES=$((EW_RETRIES+1))
         done
 
@@ -462,8 +491,103 @@ stat -c %s /dev/null > /dev/null 2>&1 || {
   }
 }
 
+function signal_pids() {
+        local sig="$1"
+        shift
+        local pids=($*)
+
+        if [[ ${#pids[@]} -gt 0 ]]; then
+                kill -${sig} ${pids[@]} 2>/dev/null || true
+        fi
+}
+
+function check_pids() {
+        local pids=($*)
+        local tmp=()
+        local pid
+
+        for pid in "${pids[@]}"; do
+                kill -0 "${pid}" 2>/dev/null && tmp+=(${pid})
+        done
+
+        echo "${tmp[@]}"
+}
+
+function pids_alive() {
+        local pids=($*)
+
+        if [[ "$(check_pids ${pids[@]})" != "" ]]; then
+                return 1;
+        fi
+
+        return 0
+}
+
+function terminate_pids() {
+        local pids=($*)
+
+        signal_pids TERM ${pids[@]}
+        wait_delay ${PROCESS_DOWN_TIMEOUT} 0.1 pids_alive ${pids[@]}
+        if [[ $? -ne 0 ]]; then
+                pids=($(check_pids ${pids[@]}))
+                signal_pids KILL ${pids[@]}
+                wait_delay 1 0.1 pids_alive ${pids[@]}
+                if [[ $? -ne 0 ]]; then
+                        return 2
+                fi
+
+                return 1
+        fi
+
+        return 0
+}
+
+function process_pids() {
+        local proc
+        local pids=()
+
+        for proc in $*; do
+                pids+=($(pgrep ${proc}))
+        done
+
+        echo "${pids[@]}"
+}
+
+## Lock files should get automatically removed once "usradd" or "groupadd"
+## command finishes. But sometimes we encounter situations (bugs) where
+## some of these files may not get properly unlocked after the execution of
+## the command. In that case, when we execute useradd next time, it may show
+## the error “cannot lock /etc/password” or “unable to lock group file”.
+## So, to avoid any such errors, check for any lock files under /etc.
+## and remove those.
+
+function remove_lock_files()
+{
+        if [ ! -f /etc/passwd.lock ];
+        then
+                rm -rf /etc/passwd.lock;
+        fi
+
+        if [ ! -f /etc/group.lock ];
+        then
+                rm -rf /etc/group.lock;
+        fi
+
+        if [ ! -f /etc/shadow.lock ];
+        then
+                rm -rf /etc/shadow.lock;
+        fi
+
+        if [ ! -f /etc/gshadow.lock ];
+        then
+                rm -rf /etc/gshadow.lock;
+        fi
+}
+
+
 function cleanup()
 {
+        local end_time
 
         # Prepare flags for umount
         case `uname -s` in
@@ -481,6 +605,9 @@ function cleanup()
                 ;;
         esac
 
+        # Clean up lock files.
+        remove_lock_files
+
         # Clean up all client mounts
         for m in `mount | grep fuse.glusterfs | awk '{print $3}'`; do
                 umount $flag $m
@@ -499,8 +626,9 @@ function cleanup()
         umount $flag /tmp/mnt* 2>/dev/null
 
 
-        # Send SIGKILL to all gluster processes and rpc.statd that are still running
-        killall -9 glusterfs glusterfsd glusterd rpc.statd 2>/dev/null || true;
+        # Send SIGTERM to all gluster processes and rpc.statd that are still running
+        terminate_pids $(process_pids glusterfs glusterfsd glusterd rpc.statd)
+
         test x"$OSTYPE" = x"NetBSD" && pkill -9 perfused || true
 
         # unregister nfs and related services from portmapper/rpcbind
@@ -597,31 +725,14 @@ function cleanup()
                 return 1;
         fi >&2
 
-        # tar logs at the start and end of every test
-        if [ -n "$LOGDIR" -a -z "$STOP_WASTING_SPACE" ]
-        then
-                tarname=$(basename $0 .t)
-		# Can't use --exclude here because NetBSD doesn't have it.
-		# However, both it and Linux have -X to take patterns from
-		# a file, so use that.
-		(echo '*.tar'; echo .notar) > ${LOGDIR}/.notar \
-			&& \
-                tar -cf ${LOGDIR}/${tarname}.tar -X ${LOGDIR}/.notar \
-			${LOGDIR}/* 2> /dev/null \
-                        && \
-                find $LOGDIR/* -maxdepth 0 -name '*.tar' -prune \
-                                        -o -exec rm -rf '{}' ';'
-        else
-                echo "LOGDIR is not set"
-        fi
-
         mkdir -p $WORKDIRS
 	# This is usually the last thing a test script calls, so our return
 	# value becomes their exit value.  While it's not great for the mkdir
 	# above to fail, promoting that into a failure of the whole test (and
 	# thus of an entire regression-test run) seems a bit excessive.  Make
 	# sure we return good status anyway.
-	return 0
+
+        return 0
 }
 
 function force_terminate () {