summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2014-06-17 13:42:45 +0000
committerVijay Bellur <vbellur@redhat.com>2014-07-12 09:20:52 -0700
commit99685f18f190a73f2a46478cac0b09f4c59834b1 (patch)
treef5e787ace3038b97876425c5397e91c0a37df04d /tests
parentd5ec66032ff96d7d417b5838a6bd1a047d52204c (diff)
dht: support heterogeneous brick sizes
Calculation of layouts now considers the size of each brick, so that smaller bricks don't get an "unfair" share of allocations and start returning ENOSPC while the larger bricks still have plenty of space. The observation has been made that some clients might get ENOTCONN when trying to fetch disk-size information, and end up calculating layouts differently. The following meta-observations can be made. (1) This scenario is extremely unlikely in configurations with AFR. (2) The most likely consequence of this scenario is that some files will be placed sub-optimally by the client with the obsolete (non-weighted) layout. They'll still be found anyway, so this isn't a show stopper. (3) Without this patch it's *guaranteed* that some files will be placed sub-optimally, because any layout that fails to account for brick sizes is sub-optimal. (4) We shouldn't be doing fix-layout from two nodes simultaneously anyway. That's inefficient at best. Any instances of such behavior are separate bugs, which should be fixed separately. (5) In the most extreme edge case, two nodes doing weighted and non-weighted layout fixes could race and end up creating an internally inconsistent layout. This condition is still transient; it will be detected and repaired automatically the next time anyone fetches the layout. (If it's not that's also a preexisting bug that can show up in other contexts.) In conclusion, it's not the purpose of this patch to fix bugs elsewhere in DHT. Its purpose is to make life incrementally better for users who add new hardware with larger disks etc. than the older equipment. It's only one part of an ongoing process to improve layout management and repair, all the way up to support for multiple hash rings or tiering. Change-Id: I05eb6f9eface9cdaf8622e0260c8c7f29020447f BUG: 1114680 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/8093 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'tests')
-rwxr-xr-xtests/bugs/bug-902610.t44
-rwxr-xr-xtests/features/weighted-rebalance.t91
2 files changed, 116 insertions, 19 deletions
diff --git a/tests/bugs/bug-902610.t b/tests/bugs/bug-902610.t
index 00ba03adfce..3f26fdde970 100755
--- a/tests/bugs/bug-902610.t
+++ b/tests/bugs/bug-902610.t
@@ -8,27 +8,33 @@ cleanup;
function get_layout()
{
layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout1_s=$(echo $layout1 | cut -c 19-26)
+ layout1_e=$(echo $layout1 | cut -c 27-34)
+ #echo "layout1 from $layout1_s to $layout1_e" > /dev/tty
layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+ layout2_s=$(echo $layout2 | cut -c 19-26)
+ layout2_e=$(echo $layout2 | cut -c 27-34)
+ #echo "layout2 from $layout2_s to $layout2_e" > /dev/tty
+
+ if [ x"$layout2_s" = x"00000000" ]; then
+ # Reverse so we only have the real logic in one place.
+ tmp_s=$layout1_s
+ tmp_e=$layout1_e
+ layout1_s=$layout2_s
+ layout1_e=$layout2_e
+ layout2_s=$tmp_s
+ layout2_e=$tmp_e
+ fi
+
+ # Figure out where the join point is.
+ target=$(python -c "print '%08x' % (0x$layout1_e + 1)")
+ #echo "target for layout2 = $target" > /dev/tty
+
+ # The second layout should cover everything that the first doesn't.
+ if [ x"$layout2_s" = x"$target" -a x"$layout2_e" = x"ffffffff" ]; then
+ return 0
+ fi
- if [ $layout1 == "0x0000000100000000000000007ffffffe" ]
- then
- if [ $layout2 == "0x00000001000000007fffffffffffffff" ]
- then
- return 0
- else
- return 1
- fi
- fi
-
- if [ $layout2 == "0x0000000100000000000000007ffffffe" ]
- then
- if [ $layout1 == "0x00000001000000007fffffffffffffff" ]
- then
- return 0
- else
- return 1
- fi
- fi
return 1
}
diff --git a/tests/features/weighted-rebalance.t b/tests/features/weighted-rebalance.t
new file mode 100755
index 00000000000..a5e746970ae
--- /dev/null
+++ b/tests/features/weighted-rebalance.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+NFILES=1000
+
+touch_files () {
+ for i in $(seq 1 $NFILES); do
+ touch $(printf $M0/dir/file%02d $i) 2> /dev/null
+ done
+}
+
+count_files () {
+ found=0
+ for i in $(seq 1 $NFILES); do
+ if [ -f $(printf $1/dir/file%02d $i) ]; then
+ found=$((found+1))
+ fi
+ done
+ echo $found
+}
+
+wait_for_rebalance () {
+ while true; do
+ rebalance_completed
+ if [ $? -eq 1 ]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+}
+
+get_xattr () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | tr -d ' '
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir ${B0}/${V0}{1,2}
+
+TEST truncate --size $((40*1024*1024)) ${B0}/disk1
+TEST mkfs.xfs -f -i size=512 ${B0}/disk1
+TEST mount -o loop ${B0}/disk1 ${B0}/${V0}1
+
+TEST truncate --size $((80*1024*1024)) ${B0}/disk2
+TEST mkfs.xfs -f -i size=512 ${B0}/disk2
+TEST mount -o loop ${B0}/disk2 ${B0}/${V0}2
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Create some files for later tests.
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch_files
+TEST umount $M0
+
+# Check that the larger brick got more of the files.
+nfiles=$(count_files ${B0}/${V0}2)
+echo $nfiles $(get_xattr ${B0}/${V0}1) $(get_xattr ${B0}/${V0}2) > /dev/tty
+TEST [ $nfiles -ge 580 ]
+
+# Turn off the size-weighted rebalance.
+TEST $CLI volume set $V0 cluster.weighted-rebalance off
+
+# Rebalance again and check that the distribution is even again.
+TEST $CLI volume rebalance $V0 start force
+TEST wait_for_rebalance
+nfiles=$(count_files ${B0}/${V0}2)
+echo $nfiles $(get_xattr ${B0}/${V0}1) $(get_xattr ${B0}/${V0}2) > /dev/tty
+TEST [ $nfiles -le 580 ]
+
+exit 0
+
+$CLI volume stop $V0
+umount ${B0}/${V0}{1,2}
+rm -f ${B0}/disk{1,2}
+
+cleanup