summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaghavendra Talur <rtalur@redhat.com>2013-01-24 11:26:37 +0530
committerAnand Avati <avati@redhat.com>2013-02-04 08:43:50 -0800
commit2a46c8769bc2b6ad491a305ea1d38023d0e22617 (patch)
tree1c4f5ef50b148c13b5f00bbcca2195578ef8c08a
parent50f0882051dff81882115bf72abb67577099944f (diff)
cluster/dht: Correct min_free_disk behaviour
Problem: Files were being created in subvol which had less than min_free_disk available even in the cases where other subvols with more space were available. Solution: Changed the logic to look for subvol which has more space available. In cases where all the subvols have lesser than Min_free_disk available , the one with max space and atleast one inode is available. Known Issue: Cannot ensure that first file that is created right after min-free-value is crossed on a brick will get created in other brick because disk usage stat takes some time to update in glusterprocess. Will fix that as part of another bug. Change-Id: If3ae0bf5a44f8739ce35b3ee3f191009ddd44455 BUG: 858488 Signed-off-by: Raghavendra Talur <rtalur@redhat.com> Reviewed-on: http://review.gluster.org/4420 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--tests/bugs/bug-858488-min-free-disk.t114
-rw-r--r--tests/include.rc15
-rw-r--r--xlators/cluster/dht/src/dht-common.h4
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c112
4 files changed, 218 insertions, 27 deletions
diff --git a/tests/bugs/bug-858488-min-free-disk.t b/tests/bugs/bug-858488-min-free-disk.t
new file mode 100644
index 00000000000..43ef1496ba1
--- /dev/null
+++ b/tests/bugs/bug-858488-min-free-disk.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function pidgrep()
+{
+ ps ax | grep "$1" | awk '{print $1}' | head -1
+}
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create partitions for bricks
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 200M $B0/brick2
+TEST LO1=`losetup --find --show $B0/brick1`
+TEST mkfs.xfs $LO1
+TEST LO2=`losetup --find --show $B0/brick2`
+TEST mkfs.xfs $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST mount -t xfs $LO1 $B0/${V0}1
+TEST mount -t xfs $LO2 $B0/${V0}2
+
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | awk '{print $1}' | head -1`
+## Real test starts here
+## ----------------------------------------------------------------------------
+
+MINFREEDISKVALUE=90
+
+## Set min free disk to MINFREEDISKVALUE percent
+TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE
+
+## We need to have file name to brick map based on hash.
+## We will use this info in test case 0.
+i=1
+CONTINUE=2
+BRICK1FILE=0
+BRICK2FILE=0
+while [[ $CONTINUE -ne 0 ]]
+do
+ dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
+
+ if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]]
+ then
+ BRICK1FILE=file$i.data
+ CONTINUE=$CONTINUE-1
+ fi
+
+ if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]]
+ then
+ BRICK2FILE=file$i.data
+ CONTINUE=$CONTINUE-1
+ fi
+
+ rm $M0/file$i.data
+ let i++
+done
+
+
+## Bring free space on one of the bricks to less than minfree value by
+## creating one big file.
+dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1
+
+#Lets find out where it was created
+if [ -f $B0/${V0}1/fillonebrick.data ]
+then
+ FILETOCREATE=$BRICK1FILE
+ OTHERBRICK=$B0/${V0}2
+else
+ FILETOCREATE=$BRICK2FILE
+ OTHERBRICK=$B0/${V0}1
+fi
+
+##--------------------------------TEST CASE 0-----------------------------------
+## If we try to create a file which should go into full brick as per hash, it
+## should go into the other brick instead.
+
+## Before that let us create files just to make gluster refresh the stat
+## Using touch so it should not change the disk usage stats
+for k in {1..20};
+do
+ touch $M0/dummyfile$k
+done
+
+dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1
+TEST [ -e $OTHERBRICK/$FILETOCREATE ]
+
+## Done testing, lets clean up
+EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID
+TEST rm -rf $M0/*
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+$CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/include.rc b/tests/include.rc
index 1d1386f9826..5d9c96cea0e 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -168,6 +168,21 @@ function cleanup()
{
killall -15 glusterfs glusterfsd glusterd 2>/dev/null || true;
killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true;
+
+ MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'`
+ for m in $MOUNTPOINTS;
+ do
+ umount $m
+ done
+
+
+ LOOPDEVICES=`losetup -a | grep "$B0/" | awk '{print $1}' | tr -d :`
+ for l in $LOOPDEVICES;
+ do
+ losetup -d $l
+ done
+
+
rm -rf /var/lib/glusterd/* $B0/* /etc/glusterd/*;
umount -l $M0 2>/dev/null || true;
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 764b37ac4a0..0dd654650c0 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -724,4 +724,8 @@ int
dht_dir_has_layout (dict_t *xattr);
gf_boolean_t
dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 52ea3a32aca..0c87f4a647c 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -248,12 +248,11 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
return is_subvol_filled;
}
+
+/*Get the best subvolume to create the file in*/
xlator_t *
dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- double max = 0;
- double max_inodes = 0;
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
@@ -261,37 +260,96 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
LOCK (&conf->subvolume_lock);
{
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if ((conf->du_stats[i].avail_percent > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_percent;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if ((conf->du_stats[i].avail_space > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_space;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol);
+ }
- }
- }
}
UNLOCK (&conf->subvolume_lock);
if (!avail_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space and inodes to create");
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "no subvolume has enough free space and/or inodes\
+ to create");
+ avail_subvol = subvol;
}
- if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
- avail_subvol = subvol;
-
- if (!avail_subvol)
- avail_subvol = subvol;
return avail_subvol;
}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
+}
+
+
+/* Get subvol which has atleast one inode and maximum space */
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ double max = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
+}