summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2014-05-21 17:47:03 +0530
committerVijay Bellur <vbellur@redhat.com>2014-06-02 03:56:46 -0700
commitdb022ef7ecca77cbecbcc4c046b6d3aafd2cb86f (patch)
tree0abb9a8bfb99ef814d1f994a60826d2a1c63b64c
parent0d26de1b0a4a0fbf7e33dab8bc46c38c179e8645 (diff)
cluster/dht: Fix min-free-disk calculations when quota-deem-statfs is on
PROBLEM: As part of file creation, DHT sends a statfs call to all of its sub-volumes and expects in return the local space consumption and availability on each one of them. This information is used by DHT to ensure that atleast min-free-disk amount of space is left on each sub-volume in the event that there ARE other sub-volumes with more space available. But when quota-deem-statfs is enabled, quota xlator on every brick unwinds the statfs call with volume-wide consumption of disk space. This leads to miscalculation in min-free-disk algo, thereby misleading DHT at some point, into thinking all sub-volumes have equal available space, in which case DHT keeps sending new file creates to subvol-0, causing it to become 100% full at some point although there ARE other subvols with ample space available. FIX: The fix is to make quota_statfs() behave as if quota xlator weren't enabled, thereby making every brick return only its local consumption and disk space availability. Change-Id: I211371a1eddb220037bd36a128973938ea8124c2 BUG: 1099890 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/7845 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--libglusterfs/src/glusterfs.h1
-rw-r--r--tests/bugs/bug-1099890.t120
-rw-r--r--tests/dht.rc19
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c16
-rw-r--r--xlators/features/quota/src/quota.c16
5 files changed, 167 insertions, 5 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 93d7260..aad2315 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -89,6 +89,7 @@
#define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo"
#define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set"
#define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup"
+#define GF_INTERNAL_IGNORE_DEEM_STATFS "ignore-deem-statfs"
#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
diff --git a/tests/bugs/bug-1099890.t b/tests/bugs/bug-1099890.t
new file mode 100644
index 0000000..1c07b62
--- /dev/null
+++ b/tests/bugs/bug-1099890.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+## TO-DO: Fix the following once the dht du refresh interval issue is fixed:
+## 1. Do away with sleep(1).
+## 2. Do away with creation of empty files.
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+# Create 2 loop devices, one per brick.
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+
+TEST L1=`losetup --find --show $B0/brick1`
+TEST mkfs.xfs $L1
+
+TEST L2=`losetup --find --show $B0/brick2`
+TEST mkfs.xfs $L2
+
+TEST mkdir -p $B0/${V0}{1,2}
+
+TEST mount -t xfs $L1 $B0/${V0}1
+TEST mount -t xfs $L2 $B0/${V0}2
+
+# Create a plain distribute volume with 2 subvols.
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+EXPECT "Started" volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable;
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+
+TEST $CLI volume quota $V0 limit-usage / 150MB;
+
+TEST $CLI volume set $V0 cluster.min-free-disk 50%
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+# Make sure quota-deem-statfs is working as expected
+EXPECT "150M" echo `df -h $M0 -P | tail -1 | awk {'print $2'}`
+
+# Create a new file 'foo' under the root of the volume, which hashes to subvol-0
+# of DHT, that consumes 40M
+TEST dd if=/dev/zero of=$M0/foo bs=5M count=8
+
+TEST stat $B0/${V0}1/foo
+TEST ! stat $B0/${V0}2/foo
+
+# Create a new file 'bar' under the root of the volume, which hashes to subvol-1
+# of DHT, that consumes 40M
+TEST dd if=/dev/zero of=$M0/bar bs=5M count=8
+
+TEST ! stat $B0/${V0}1/bar
+TEST stat $B0/${V0}2/bar
+
+# Touch a zero-byte file on the root of the volume to make sure the statfs data
+# on DHT is refreshed
+sleep 1;
+TEST touch $M0/empty1;
+
+# At this point, the available space on each subvol {60M,60M} is greater than
+# their min-free-disk {50M,50M}, but if this bug still exists, then
+# the total available space on the volume as perceived by DHT should be less
+# than min-free-disk, i.e.,
+#
+# consumed space returned per subvol by quota = (40M + 40M) = 80M
+#
+# Therefore, consumed space per subvol computed by DHT WITHOUT the fix would be:
+# (80M/150M)*100 = 53%
+#
+# Available space per subvol as perceived by DHT with the bug = 47%
+# which is less than min-free-disk
+
+# Now I create a file that hashes to subvol-1 (counting from 0) of DHT.
+# If this bug still exists,then DHT should be routing this creation to subvol-0.
+# If this bug is fixed, then DHT should be routing the creation to subvol-1 only
+# as it has more than min-free-disk space available.
+
+TEST dd if=/dev/zero of=$M0/file bs=1K count=1
+sleep 1;
+TEST ! stat $B0/${V0}1/file
+TEST stat $B0/${V0}2/file
+
+# Touch another zero-byte file on the root of the volume to refresh statfs
+# values stored by DHT.
+
+TEST touch $M0/empty2;
+
+# Now I create a new file that hashes to subvol-0, at the end of which, there
+# will be less than min-free-disk space available on it.
+TEST dd if=/dev/zero of=$M0/fil bs=5M count=4
+sleep 1;
+TEST stat $B0/${V0}1/fil
+TEST ! stat $B0/${V0}2/fil
+
+# Touch to refresh statfs info cached by DHT
+
+TEST touch $M0/empty3;
+
+# Now I create a file that hashes to subvol-0 but since it has less than
+# min-free-disk space available, its data will be cached on subvol-1.
+
+TEST dd if=/dev/zero of=$M0/zz bs=5M count=1
+
+TEST stat $B0/${V0}1/zz
+TEST stat $B0/${V0}2/zz
+
+EXPECT "$V0-client-1" dht_get_linkto_target "$B0/${V0}1/zz"
+
+EXPECT "1" is_dht_linkfile "$B0/${V0}1/zz"
+
+cleanup;
diff --git a/tests/dht.rc b/tests/dht.rc
index a11bbfd..50c4532 100644
--- a/tests/dht.rc
+++ b/tests/dht.rc
@@ -91,3 +91,22 @@ function remove_brick_completed()
echo $val
return $val
}
+
+function dht_get_linkto_target()
+{
+ local path=$1;
+ echo `getfattr -d -m . -e text --only-values --absolute-names --name=trusted.glusterfs.dht.linkto $path`
+}
+
+function is_dht_linkfile()
+{
+ local path=$1
+ retval=0
+ local output=`stat --format=%a $path`
+ if [ $output -eq 1000 ]; then
+ retval=1
+ fi
+
+ echo $retval
+ return $retval
+}
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index fe3955e..d035aca 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -135,6 +135,7 @@ int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
int i = 0;
+ int ret = -1;
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
@@ -164,12 +165,25 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
+ statfs_local->params = dict_new ();
+ if (!statfs_local->params)
+ goto err;
+
+ ret = dict_set_int8 (statfs_local->params,
+ GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set "
+ GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
+ goto err;
+ }
+
statfs_local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (statfs_frame, dht_du_info_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->statfs,
- &tmp_loc, NULL);
+ &tmp_loc, statfs_local->params);
}
conf->last_stat_fetch.tv_sec = tv.tv_sec;
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index c3e730b..06ef961 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -3808,16 +3808,24 @@ out:
int32_t
quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int op_errno = 0;
- int ret = -1;
- quota_priv_t *priv = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ int8_t ignore_deem_statfs = 0;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
priv = this->private;
GF_ASSERT (loc);
WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+ ret = dict_get_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS,
+ &ignore_deem_statfs);
+ ret = 0;
+
+ if (ignore_deem_statfs)
+ goto off;
+
if (priv->consider_statfs && loc->inode) {
local = quota_local_new ();
if (!local) {