summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-11-01 10:22:00 -0500
committerDan Lambright <dlambrig@redhat.com>2015-11-07 13:44:38 -0800
commit5b989f034d522ffe8c311481b7ef2064358cb06f (patch)
tree9fd1feba9c3a71b361c66240ff13c79f17f4de88
parentf3b555e029e6531d914fb67e098929ffe03d161b (diff)
cluster/tier correct promotion cycle calculation
This is a backport of 12480 The tier translator should only choose candidate files for promotion from the most recent cycle, not a multiple of the most recent cycles. Otherwise user observed behavior can be inconsistent. Remove related test in tier.t that is subject to race condition. > Change-Id: I9ad1523cac00f904097ce468efa6ddd515857024 > BUG: 1275524 > Signed-off-by: root <root@rhs-cli-15.gdev.lab.eng.bos.redhat.com> > Signed-off-by: Dan Lambright <dlambrig@redhat.com> > Reviewed-on: http://review.gluster.org/12480 > Reviewed-by: Joseph Fernandes > Tested-by: Gluster Build System <jenkins@build.gluster.com> Signed-off-by: Dan Lambright <dlambrig@redhat.com> Signed-off-by: Dan Lambright <dlambrig@redhat.com> Conflicts: tests/basic/tier/tier.t xlators/cluster/dht/src/tier.c Change-Id: Ic4587bf1b5d26ba377a12a4ce8e329362988a33b BUG: 1275483 Reviewed-on: http://review.gluster.org/12536 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com>
-rw-r--r--tests/basic/tier/legacy-many.t19
-rwxr-xr-xtests/basic/tier/tier.t38
-rw-r--r--tests/tier.rc13
-rw-r--r--xlators/cluster/dht/src/tier.c12
4 files changed, 53 insertions, 29 deletions
diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t
index 2c7ff3e5407..6e1147aec2d 100644
--- a/tests/basic/tier/legacy-many.t
+++ b/tests/basic/tier/legacy-many.t
@@ -9,13 +9,12 @@ LAST_BRICK=3
CACHE_BRICK_FIRST=4
CACHE_BRICK_LAST=5
DEMOTE_TIMEOUT=12
-PROMOTE_TIMEOUT=5
+PROMOTE_TIMEOUT=12
MIGRATION_TIMEOUT=10
DEMOTE_FREQ=60
-PROMOTE_FREQ=4
+PROMOTE_FREQ=10
TEST_DIR="test_files"
-NUM_FILES=20
-
+NUM_FILES=15
function read_all {
for file in *
@@ -49,19 +48,25 @@ wait
TEST $CLI volume attach-tier $V0 replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST
TEST $CLI volume rebalance $V0 tier status
+
TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
TEST $CLI volume set $V0 cluster.read-freq-threshold 0
TEST $CLI volume set $V0 cluster.write-freq-threshold 0
+# wait a little for lookup heal to finish
+sleep 10
+
# Read "legacy" files
drop_cache $M0
-cd ${TEST_DIR}
+
+sleep_until_mid_cycle $DEMOTE_FREQ
+
TEST read_all
# Test to make sure files were promoted as expected
-sleep $DEMOTE_TIMEOUT
-EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters $NUM_FILES 0
+sleep $PROMOTE_TIMEOUT
+EXPECT_WITHIN $PROMOTE_TIMEOUT "0" check_counters $NUM_FILES 0
cd;
cleanup
diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t
index 5bb2dc95164..a197d6eb127 100755
--- a/tests/basic/tier/tier.t
+++ b/tests/basic/tier/tier.t
@@ -11,22 +11,14 @@ DEMOTE_TIMEOUT=12
PROMOTE_TIMEOUT=5
MIGRATION_TIMEOUT=10
DEMOTE_FREQ=4
-PROMOTE_FREQ=4
-
-
-# Timing adjustment to avoid spurious errors with first instances of file_on_fast_tier
-function sleep_first_cycle {
- startTime=$(date +%s)
- mod=$(( ( $startTime % $DEMOTE_FREQ ) + 1 ))
- sleep $mod
-}
+PROMOTE_FREQ=12
function file_on_slow_tier {
found=0
for i in `seq 0 $LAST_BRICK`; do
- test -e $B0/${V0}${i}/$1 && found=1 && break;
+ test -e "$B0/${V0}${i}/$1" && found=1 && break;
done
if [ "$found" == "1" ]
@@ -56,7 +48,7 @@ function file_on_fast_tier {
found=0
for j in `seq $CACHE_BRICK_FIRST $CACHE_BRICK_LAST`; do
- test -e $B0/${V0}${j}/$1 && found=1 && break;
+ test -e "$B0/${V0}${j}/$1" && found=1 && break;
done
@@ -120,10 +112,14 @@ TEST ! $CLI volume set $V0 cluster.tier-max-files -3
TEST ! $CLI volume set $V0 cluster.watermark-low 90
# stop the volume and restart it. The rebalance daemon should restart.
+cd /tmp
+umount $M0
TEST $CLI volume stop $V0
TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+cd $M0
-sleep_first_cycle
+sleep_first_cycle $DEMOTE_FREQ
$CLI volume tier $V0 status
#Tier options expect non-negative value
@@ -158,9 +154,12 @@ uuidgen > /tmp/d1/data2.txt
md5data2=$(fingerprint /tmp/d1/data2.txt)
cp /tmp/d1/data2.txt ./d1/data2.txt
-uuidgen > /tmp/d1/data3.txt
-md5data3=$(fingerprint /tmp/d1/data3.txt)
-mv /tmp/d1/data3.txt ./d1/data3.txt
+#File with spaces and special characters.
+SPACE_FILE="file with spaces & $peci@l ch@r@cter$ @!@$%^$#@^^*&%$#$%.txt"
+
+uuidgen > "/tmp/d1/$SPACE_FILE"
+md5space=$(fingerprint "/tmp/d1/$SPACE_FILE")
+mv "/tmp/d1/$SPACE_FILE" "./d1/$SPACE_FILE"
# Check auto-demotion on write new.
sleep $DEMOTE_TIMEOUT
@@ -169,11 +168,12 @@ sleep $DEMOTE_TIMEOUT
UUID=$(uuidgen)
echo $UUID >> /tmp/d1/data2.txt
md5data2=$(fingerprint /tmp/d1/data2.txt)
-echo $UUID >> ./d1/data2.txt
-# Check promotion on read to slow tier
+sleep_until_mid_cycle $DEMOTE_FREQ
drop_cache $M0
-cat d1/data3.txt
+
+echo $UUID >> ./d1/data2.txt
+cat "./d1/$SPACE_FILE"
sleep $PROMOTE_TIMEOUT
sleep $DEMOTE_FREQ
@@ -185,7 +185,7 @@ TEST glusterd
EXPECT "0" file_on_slow_tier d1/data.txt $md5data
EXPECT "0" file_on_slow_tier d1/data2.txt $md5data2
-EXPECT "0" file_on_slow_tier d1/data3.txt $md5data3
+EXPECT "0" file_on_slow_tier "./d1/$SPACE_FILE" $md5space
TEST $CLI volume tier $V0 detach start
diff --git a/tests/tier.rc b/tests/tier.rc
index 3fa6af6a40f..4fd24de0659 100644
--- a/tests/tier.rc
+++ b/tests/tier.rc
@@ -99,3 +99,16 @@ function confirm_vol_stopped {
fi
}
+
+function sleep_first_cycle {
+ startTime=$(date +%s)
+ mod=$(( ( $startTime % $1 ) + 1 ))
+ sleep $mod
+}
+
+function sleep_until_mid_cycle {
+ startTime=$(date +%s)
+ mod=$(( ( $startTime % $1 ) + 1 ))
+ mod=$(( $mod + $1 / 2 ))
+ sleep $mod
+}
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 31d5e8b627e..618b82d18ee 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -31,7 +31,6 @@ static void *libhandle;
static gfdb_methods_t gfdb_methods;
#define DB_QUERY_RECORD_SIZE 4096
-#define PROMOTION_CYCLE_CNT 4
static int
@@ -1067,7 +1066,14 @@ tier_build_migration_qfile (demotion_args_t *args,
goto out;
}
time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
- time_in_past.tv_usec = current_time.tv_usec - time_in_past.tv_usec;
+
+ /* The migration daemon may run a varrying numberof usec after the sleep */
+ /* call triggers. A file may be registered in CTR some number of usec X */
+ /* after the daemon started and missed in the subsequent cycle if the */
+ /* daemon starts Y usec after the period in seconds where Y>X. Normalize */
+ /* away this problem by always setting usec to 0. */
+ time_in_past.tv_usec = 0;
+
gfdb_brick_dict_info.time_stamp = &time_in_past;
gfdb_brick_dict_info._gfdb_promote = is_promotion;
gfdb_brick_dict_info._query_cbk_args = query_cbk_args;
@@ -1482,7 +1488,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
promotion_args.this = this;
promotion_args.brick_list = &bricklist_cold;
promotion_args.defrag = defrag;
- promotion_args.freq_time = freq_promote * PROMOTION_CYCLE_CNT;
+ promotion_args.freq_time = freq_promote;
ret_promotion = pthread_create (&promote_thread,
NULL, &tier_promote,
&promotion_args);