summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xtests/basic/quota-anon-fd-nfs.t84
-rw-r--r--tests/basic/quota-nfs.c (renamed from tests/basic/quota-anon-fd-nfs.c)0
-rwxr-xr-xtests/basic/quota-nfs.t52
-rwxr-xr-xtests/bugs/bug-1161156.t10
-rw-r--r--xlators/features/quota/src/quota.c325
-rw-r--r--xlators/features/quota/src/quota.h20
6 files changed, 410 insertions, 81 deletions
diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t
index 0f7a9aac52e..5841580f9a8 100755
--- a/tests/basic/quota-anon-fd-nfs.t
+++ b/tests/basic/quota-anon-fd-nfs.t
@@ -1,17 +1,11 @@
#!/bin/bash
. $(dirname $0)/../include.rc
-. $(dirname $0)/../nfs.rc
-
-function usage()
-{
- local QUOTA_PATH=$1;
- $CLI volume quota $V0 list $QUOTA_PATH | \
- grep "$QUOTA_PATH" | awk '{print $4}'
-}
+. $(dirname $0)/../fileio.rc
cleanup;
+TESTS_EXPECTED_IN_LOOP=16
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
@@ -19,34 +13,74 @@ TEST $CLI volume info;
TEST $CLI volume create $V0 $H0:$B0/brick1;
EXPECT 'Created' volinfo_field $V0 'Status';
+
+# The test makes use of inode-lru-limit to hit a scenario, where we
+# find an inode whose ancestry is not there. Following is the
+# hypothesis (which is confirmed by seeing logs indicating that
+# codepath has been executed, but not through a good understanding of
+# NFS internals).
+
+# At the end of an fop, the reference count of an inode would be
+# zero. The inode (and its ancestry) persists in memory only
+# because of non-zero lookup count. These looked up inodes are put
+# in an lru queue of size 1 (here). So, there can be at most one
+# such inode in memory.
+
+# NFS Server makes use of anonymous fds. So, if it cannot find
+# valid fd, it does a nameless lookup. This gives us an inode
+# whose ancestry is NULL. When a write happens on this inode,
+# quota-enforcer/marker finds a NULL ancestry and asks
+# storage/posix to build it.
+
TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
-TEST mount_nfs $H0:/$V0 $N0
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST mount -t nfs -o noac,soft,nolock,vers=3 $H0:/$V0 $N0
deep=/0/1/2/3/4/5/6/7/8/9
TEST mkdir -p $N0/$deep
-TEST dd if=/dev/zero of=$N0/$deep/file bs=1k count=10240
+TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4
-TEST $CLI volume quota $V0 enable
-TEST $CLI volume quota $V0 limit-usage / 20MB
-TEST $CLI volume quota $V0 soft-timeout 0
-TEST $CLI volume quota $V0 hard-timeout 0
+TEST fd_open 3 'w' "$N0/$deep/file1"
+TEST fd_open 4 'w' "$N0/$deep/file2"
+TEST fd_open 5 'w' "$N0/$deep/file3"
+TEST fd_open 6 'w' "$N0/$deep/file4"
+
+# consume all quota
+TEST ! dd if=/dev/zero of="$N0/$deep/file" bs=1MB count=1
+
+# At the end of each fop in server, reference count of the
+# inode associated with each of the file above drops to zero and hence
+# put into lru queue. Since lru-limit is set to 1, an fop next file
+# will displace the current inode from itable. This will ensure that
+# when writes happens on same fd, fd resolution results in
+# nameless lookup from server and quota_writev encounters an fd
+# associated with an inode whose parent is not present in itable.
+
+for j in $(seq 1 2); do
+ for i in $(seq 3 6); do
+ # failing writes indicate that we are enforcing quota set on /
+ # even with anonymous fds.
+ TEST_IN_LOOP ! fd_write $i "content"
+ TEST_IN_LOOP sync
+ done
+done
-TEST dd if=/dev/zero of=$N0/$deep/newfile_1 bs=512 count=10240
-# wait for write behind to complete.
-EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/"
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
-# compile the test write program and run it
-TEST $CC $(dirname $0)/quota-anon-fd-nfs.c -o $(dirname $0)/quota-anon-fd-nfs;
-# Try to create a 100Mb file which should fail
-TEST ! $(dirname $0)/quota-anon-fd-nfs $N0/$deep/newfile_2 "104857600"
-TEST rm -f $N0/$deep/newfile_2
+$CLI volume statedump $V0 all
-## Before killing daemon to avoid deadlocks
-umount_nfs $N0
+TEST umount -l $N0
cleanup;
diff --git a/tests/basic/quota-anon-fd-nfs.c b/tests/basic/quota-nfs.c
index 4cc0322e132..4cc0322e132 100644
--- a/tests/basic/quota-anon-fd-nfs.c
+++ b/tests/basic/quota-nfs.c
diff --git a/tests/basic/quota-nfs.t b/tests/basic/quota-nfs.t
new file mode 100755
index 00000000000..501d8ab6381
--- /dev/null
+++ b/tests/basic/quota-nfs.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+function usage()
+{
+ local QUOTA_PATH=$1;
+ $CLI volume quota $V0 list $QUOTA_PATH | \
+ grep "$QUOTA_PATH" | awk '{print $4}'
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST dd if=/dev/zero of=$N0/$deep/file bs=1k count=10240
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 20MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST dd if=/dev/zero of=$N0/$deep/newfile_1 bs=512 count=10240
+# wait for write behind to complete.
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/"
+
+# compile the test write program and run it
+TEST $CC $(dirname $0)/quota-nfs.c -o $(dirname $0)/quota-nfs;
+# Try to create a 100Mb file which should fail
+TEST ! $(dirname $0)/quota-nfs $N0/$deep/newfile_2 "104857600"
+TEST rm -f $N0/$deep/newfile_2
+
+## Before killing daemon to avoid deadlocks
+umount_nfs $N0
+
+cleanup;
diff --git a/tests/bugs/bug-1161156.t b/tests/bugs/bug-1161156.t
index 12ebc45bdd3..9f33391d744 100755
--- a/tests/bugs/bug-1161156.t
+++ b/tests/bugs/bug-1161156.t
@@ -27,11 +27,13 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
TEST mount_nfs $H0:/$V0 $N0
mydir="dir"
TEST mkdir -p $N0/$mydir
+TEST mkdir -p $N0/newdir
TEST dd if=/dev/zero of=$N0/$mydir/file bs=1k count=10240
TEST $CLI volume quota $V0 enable
TEST $CLI volume quota $V0 limit-usage / 20MB
+TEST $CLI volume quota $V0 limit-usage /newdir 5MB
TEST $CLI volume quota $V0 soft-timeout 0
TEST $CLI volume quota $V0 hard-timeout 0
@@ -40,8 +42,12 @@ TEST dd if=/dev/zero of=$N0/$mydir/newfile_1 bs=512 count=10240
EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" usage "/"
TEST ! dd if=/dev/zero of=$N0/$mydir/newfile_2 bs=1k count=10240
-# Test the rename, it should fail due to space restrictions
-TEST ! mv $N0/dir/file $N0/dir/newfile_3
+# Test rename within a directory. It should pass even when the
+# corresponding directory quota is filled.
+TEST mv $N0/dir/file $N0/dir/newfile_3
+
+# rename should fail here with disk quota exceeded
+TEST ! mv $N0/dir/newfile_3 $N0/newdir/
# cleanup
umount_nfs $N0
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 3c1b8e09c5c..f903b4e57b7 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -14,17 +14,6 @@
#include "defaults.h"
#include "statedump.h"
-void
-quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this);
-
-int32_t
-quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
- char *name, uuid_t par);
-
-int
-quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict,
- loc_t *loc, struct iatt *buf, int32_t *op_errno);
-
struct volume_options options[];
static int32_t
@@ -251,6 +240,164 @@ out:
return;
}
+static inline inode_t*
+__quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
+{
+ inode_t *parent = NULL;
+
+ parent = inode_parent (inode, pargfid, name);
+ inode_unref (inode);
+ return parent;
+}
+
+static inline inode_t*
+quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
+{
+ inode_t *parent = NULL;
+
+ parent = __quota_inode_parent (inode, pargfid, name);
+ if (!parent)
+ gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to find "
+ "ancestor for inode (%s)",
+ uuid_utoa(inode->gfid));
+
+ return parent;
+}
+
+int32_t
+quota_inode_depth (inode_t *inode)
+{
+ int depth = 0;
+ inode_t *cur_inode = NULL;
+
+ cur_inode = inode_ref (inode);
+ while (cur_inode && !__is_root_gfid (cur_inode->gfid)) {
+ depth++;
+ cur_inode = quota_inode_parent (cur_inode, 0 , NULL);
+ if (!cur_inode)
+ depth = -1;
+ }
+
+ if (cur_inode)
+ inode_unref (cur_inode);
+
+ return depth;
+}
+
+int32_t quota_find_common_ancestor (inode_t *inode1, inode_t *inode2,
+ uuid_t *common_ancestor)
+{
+ int32_t depth1 = 0;
+ int32_t depth2 = 0;
+ int32_t ret = -1;
+ inode_t *cur_inode1 = NULL;
+ inode_t *cur_inode2 = NULL;
+
+ depth1 = quota_inode_depth (inode1);
+ if (depth1 < 0)
+ goto out;
+
+ depth2 = quota_inode_depth (inode2);
+ if (depth2 < 0)
+ goto out;
+
+ cur_inode1 = inode_ref (inode1);
+ cur_inode2 = inode_ref (inode2);
+
+ while (cur_inode1 && depth1 > depth2) {
+ cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL);
+ depth1--;
+ }
+
+ while (cur_inode2 && depth2 > depth1) {
+ cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL);
+ depth2--;
+ }
+
+ while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) {
+ cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL);
+ cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL);
+ depth1--;
+ }
+
+ if (cur_inode1 && cur_inode2) {
+ uuid_copy (*common_ancestor, cur_inode1->gfid);
+ ret = 0;
+ }
+out:
+ if (cur_inode1)
+ inode_unref (cur_inode1);
+
+ if (cur_inode2)
+ inode_unref (cur_inode2);
+
+ return ret;
+ }
+
+void
+check_ancestory_continue (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
+{
+ call_frame_t *frame = NULL;
+ quota_local_t *local = NULL;
+ uint32_t link_count = 0;
+
+ frame = data;
+ local = frame->local;
+
+ if (op_ret < 0 || (parents && list_empty (parents))) {
+ if (op_ret >= 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa (inode->gfid));
+ op_errno = EIO;
+ op_ret = -1;
+ }
+ }
+
+ LOCK (&local->lock);
+ {
+ link_count = --local->link_count;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (link_count == 0)
+ local->fop_continue_cbk (frame);
+}
+
+void
+check_ancestory (call_frame_t *frame, inode_t *inode)
+{
+ inode_t *cur_inode = NULL;
+ inode_t *parent = NULL;
+
+ cur_inode = inode_ref (inode);
+ while (cur_inode && !__is_root_gfid (cur_inode->gfid)) {
+ parent = inode_parent (cur_inode, 0, NULL);
+ if (!parent) {
+ quota_build_ancestry (cur_inode,
+ check_ancestory_continue, frame);
+ return;
+ }
+ inode_unref (cur_inode);
+ cur_inode = parent;
+ }
+
+ if (cur_inode) {
+ inode_unref (cur_inode);
+ check_ancestory_continue (NULL, NULL, 0, 0, frame);
+ } else {
+ check_ancestory_continue (NULL, NULL, -1, ESTALE, frame);
+ }
+}
+
static inline void
quota_link_count_decrement (quota_local_t *local)
{
@@ -827,6 +974,14 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
}
do {
+ /* In a rename operation, enforce should be stopped at common
+ ancestor */
+ if (!uuid_is_null (local->common_ancestor) &&
+ !uuid_compare (_inode->gfid, local->common_ancestor)) {
+ quota_link_count_decrement (local);
+ break;
+ }
+
if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) {
wouldbe_size = ctx->size + delta;
@@ -2046,63 +2201,51 @@ out:
return 0;
}
-int32_t
-quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+void
+quota_rename_continue (call_frame_t *frame)
{
- quota_priv_t *priv = NULL;
- int32_t ret = -1, op_errno = ENOMEM;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- call_stub_t *stub = NULL;
-
- priv = this->private;
-
- WIND_IF_QUOTAOFF (priv->is_quota_on, off);
-
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
-
- frame->local = local;
+ int32_t ret = -1;
+ int32_t op_errno = EIO;
+ quota_local_t *local = NULL;
+ uuid_t common_ancestor = {0};
+ xlator_t *this = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- ret = loc_copy (&local->oldloc, oldloc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
+ local = frame->local;
+ this = THIS;
- ret = loc_copy (&local->newloc, newloc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ if (local->op_ret < 0) {
+ op_errno = local->op_errno;
goto err;
}
- stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
- xdata);
- if (stub == NULL) {
+ ret = quota_find_common_ancestor (local->oldloc.parent,
+ local->newloc.parent,
+ &common_ancestor);
+ if (ret < 0 || uuid_is_null(common_ancestor)) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get "
+ "common_ancestor for %s and %s",
+ local->oldloc.path, local->newloc.path);
+ op_errno = ESTALE;
goto err;
}
LOCK (&local->lock);
{
local->link_count = 1;
- local->stub = stub;
+ uuid_copy (local->common_ancestor, common_ancestor);
}
UNLOCK (&local->lock);
- if (QUOTA_REG_OR_LNK_FILE (oldloc->inode->ia_type)) {
- ret = quota_inode_ctx_get (oldloc->inode, this, &ctx, 0);
+ if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) {
+ ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
"quota context not set in inode (gfid:%s), "
"considering file size as zero while enforcing "
"quota on new ancestry",
- oldloc->inode ? uuid_utoa (oldloc->inode->gfid)
- : "0");
+ uuid_utoa (local->oldloc.inode->gfid));
local->delta = 0;
-
} else {
/* FIXME: We need to account for the size occupied by this
@@ -2112,25 +2255,99 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
* directory inode*/
/* FIXME: The following code assumes that regular files and
- *linkfiles are present, in their entirety, in a single
- brick. This *assumption is invalid in the case of
- stripe.*/
+ * linkfiles are present, in their entirety, in a single
+ * brick. This *assumption is invalid in the case of
+ * stripe.*/
local->delta = ctx->buf.ia_blocks * 512;
}
- } else if (IA_ISDIR (oldloc->inode->ia_type)) {
- ret = quota_validate (frame, oldloc->inode, this,
+ } else if (IA_ISDIR (local->oldloc.inode->ia_type)) {
+ ret = quota_validate (frame, local->oldloc.inode, this,
quota_rename_get_size_cbk);
if (ret){
op_errno = -ret;
goto err;
}
- return 0;
+ return;
}
- quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+ quota_check_limit (frame, local->newloc.parent, this, NULL, NULL);
+ return;
+
+err:
+ if (local && local->stub)
+ call_stub_destroy (local->stub);
+
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return;
+
+}
+
+int32_t
+quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ uuid_t common_ancestor = {0};
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ /* No need to check quota limit if src and dst parents are same */
+ if (oldloc->parent && newloc->parent &&
+ !uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) {
+ gf_log (this->name, GF_LOG_DEBUG, "rename %s -> %s are "
+ "in the same directory, so skip check limit",
+ oldloc->path, newloc->path);
+ goto off;
+ }
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->oldloc, oldloc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy (&local->newloc, newloc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
+ xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ /* link_count here tell how many check_ancestory should be done
+ * before continuing the FOP
+ */
+ local->link_count = 2;
+ local->stub = stub;
+ local->fop_continue_cbk = quota_rename_continue;
+ }
+ UNLOCK (&local->lock);
+
+ check_ancestory (frame, newloc->parent);
+ check_ancestory (frame, oldloc->parent);
return 0;
err:
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 5a4bcb2b1e0..3d6c65f8fb6 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -181,6 +181,9 @@ typedef void
(*quota_ancestry_built_t) (struct list_head *parents, inode_t *inode,
int32_t op_ret, int32_t op_errno, void *data);
+typedef void
+(*quota_fop_continue_t) (call_frame_t *frame);
+
struct quota_local {
gf_lock_t lock;
uint32_t validate_count;
@@ -196,7 +199,9 @@ struct quota_local {
gf_boolean_t skip_check;
char just_validated;
fop_lookup_cbk_t validate_cbk;
+ quota_fop_continue_t fop_continue_cbk;
inode_t *inode;
+ uuid_t common_ancestor; /* Used by quota_rename */
call_stub_t *stub;
struct iobref *iobref;
quota_limit_t limit;
@@ -235,4 +240,19 @@ void
quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
int64_t delta);
+int
+quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk,
+ void *data);
+
+void
+quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this);
+
+int32_t
+quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ char *name, uuid_t par);
+
+int
+quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict,
+ loc_t *loc, struct iatt *buf, int32_t *op_errno);
+
#endif