From 1d0402b25aa24e2076fb79cbeb4ba3ed3a9f3ede Mon Sep 17 00:00:00 2001 From: Joseph Fernandes Date: Wed, 14 Oct 2015 00:00:41 +0530 Subject: tier/libgfdb: Replacing ASCII query file with binary Earlier, when the database was queried we used to save all the queried records in an ASCII format in the query file. This caused issues like filename having ASCII delimiter and used to take a lot of space. The tier.c file also had a lot of parsing code. Here we changed the format of the query file to binary. All the logic of serialization and formating of query record is done by libgfdb. Libgfdb provides API, gfdb_write_query_record() and gfdb_read_query_record(), which the user i.e tier migrator and CTR xlator can use to write to and read from query file. With this binary format we save on disk space i.e reduce to 50% atleast as we are saving GFID's in binary format 16 bytes and not the string format which takes 36 bytes + We are not saving path of the file + we are also saving on ASCII delimiters. The on disk format of query record is as follows, +---------------------------------------------------------------------------+ | Length of serialized query record | Serialized Query Record | +---------------------------------------------------------------------------+ 4 bytes Length of serialized query record | | -------------------------------------------------| | | V Serialized Query Record Format: +---------------------------------------------------------------------------+ | GFID | Link count | |..... | FOOTER | +---------------------------------------------------------------------------+ 16 B 4 B Link Length 4 B | | | | -----------------------------| | | | | | V | Each will be serialized as | +-----------------------------------------------+ | | PGID | BASE_NAME_LENGTH | BASE_NAME | | +-----------------------------------------------+ | 16 B 4 B BASE_NAME_LENGTH | | | ------------------------------------------------------------------------| | | V FOOTER is a magic number 0xBAADF00D indicating the end of the record. This also serves as a serialized schema validator. Change-Id: I9db7416fd421e118dd44eafab8b535caafe50d5a BUG: 1272207 Signed-off-by: Joseph Fernandes Reviewed-on: http://review.gluster.org/12354 Reviewed-by: N Balachandran Tested-by: Gluster Build System Reviewed-by: Dan Lambright Tested-by: Dan Lambright --- tests/basic/tier/tier.t | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'tests/basic') diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t index 0b88ac3bb74..f38aa898c11 100755 --- a/tests/basic/tier/tier.t +++ b/tests/basic/tier/tier.t @@ -26,7 +26,7 @@ function file_on_slow_tier { found=0 for i in `seq 0 $LAST_BRICK`; do - test -e $B0/${V0}${i}/$1 && found=1 && break; + test -e "$B0/${V0}${i}/$1" && found=1 && break; done if [ "$found" == "1" ] @@ -56,7 +56,7 @@ function file_on_fast_tier { found=0 for j in `seq $CACHE_BRICK_FIRST $CACHE_BRICK_LAST`; do - test -e $B0/${V0}${j}/$1 && found=1 && break; + test -e "$B0/${V0}${j}/$1" && found=1 && break; done @@ -162,9 +162,12 @@ uuidgen > /tmp/d1/data2.txt md5data2=$(fingerprint /tmp/d1/data2.txt) cp /tmp/d1/data2.txt ./d1/data2.txt -uuidgen > /tmp/d1/data3.txt -md5data3=$(fingerprint /tmp/d1/data3.txt) -mv /tmp/d1/data3.txt ./d1/data3.txt +#File with spaces and special characters. +SPACE_FILE="file with spaces & $peci@l ch@r@cter$ @!@$%^$#@^^*&%$#$%.txt" + +uuidgen > "/tmp/d1/$SPACE_FILE" +md5space=$(fingerprint "/tmp/d1/$SPACE_FILE") +mv "/tmp/d1/$SPACE_FILE" "./d1/$SPACE_FILE" # Check auto-demotion on write new. sleep $DEMOTE_TIMEOUT @@ -177,7 +180,7 @@ echo $UUID >> ./d1/data2.txt # Check promotion on read to slow tier drop_cache $M0 -cat d1/data3.txt +cat "./d1/$SPACE_FILE" sleep $PROMOTE_TIMEOUT sleep $DEMOTE_FREQ @@ -189,7 +192,7 @@ TEST glusterd EXPECT "0" file_on_slow_tier d1/data.txt $md5data EXPECT "0" file_on_slow_tier d1/data2.txt $md5data2 -EXPECT "0" file_on_slow_tier d1/data3.txt $md5data3 +EXPECT "0" file_on_slow_tier "./d1/$SPACE_FILE" $md5space TEST $CLI volume tier $V0 detach start -- cgit