summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/compat.h8
-rw-r--r--tests/basic/ec/ec-fallocate.t72
-rw-r--r--tests/basic/ec/ec-rebalance.t60
-rw-r--r--xlators/cluster/ec/src/ec-fops.h4
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c203
-rw-r--r--xlators/cluster/ec/src/ec.c5
-rw-r--r--xlators/storage/posix/src/posix.c10
7 files changed, 354 insertions, 8 deletions
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h
index fbaac76b9ee..f4da4b2a0de 100644
--- a/libglusterfs/src/compat.h
+++ b/libglusterfs/src/compat.h
@@ -59,6 +59,12 @@
#ifndef FALLOC_FL_ZERO_RANGE
#define FALLOC_FL_ZERO_RANGE 0x10 /* zeroes out range */
#endif
+#ifndef FALLOC_FL_COLLAPSE_RANGE
+#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* reduces the size */
+#endif
+#ifndef FALLOC_FL_INSERT_RANGE
+#define FALLOC_FL_INSERT_RANGE 0x20 /* expands the size */
+#endif
#ifndef HAVE_LLISTXATTR
@@ -177,6 +183,8 @@ enum {
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_ZERO_RANGE 0x10 /* zeroes out range */
+#define FALLOC_FL_INSERT_RANGE 0x20 /* Expands the size */
+#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* Reduces the size */
#ifndef _PATH_UMOUNT
#define _PATH_UMOUNT "/sbin/umount"
diff --git a/tests/basic/ec/ec-fallocate.t b/tests/basic/ec/ec-fallocate.t
new file mode 100644
index 00000000000..1b827eed7df
--- /dev/null
+++ b/tests/basic/ec/ec-fallocate.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Run several commands to verify basic fallocate functionality. We verify that
+# fallocate creates and allocates blocks to a file. We also verify that the keep
+# size option does not modify the file size.
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# check for fallocate support before continuing the test
+require_fallocate -l 1m -n $M0/file && rm -f $M0/file
+
+# fallocate a file and verify blocks are allocated
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+TEST [ $(($blksz * $nblks)) -eq 1048576 ]
+
+TEST unlink $M0/file
+
+# truncate a file to a fixed size, fallocate and verify that the size does not
+# change
+TEST truncate -s 1M $M0/file
+TEST fallocate -l 2m -n $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+sz=`stat -c %s $M0/file`
+TEST [ $sz -eq 1048576 ]
+# Note that gluster currently incorporates a hack to limit the number of blocks
+# reported as allocated to the file by the file size. We have allocated beyond the
+# file size here. Just check for non-zero allocation to avoid setting a land mine
+# for if/when that behavior might change.
+TEST [ ! $(($blksz * $nblks)) -eq 0 ]
+TEST unlink $M0/file
+
+# write some data, fallocate within and outside the range
+# and check for data corruption.
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -o 512k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post
+TEST cmp $M0/file.copy.pre $M0/file.copy.post
+TEST fallocate -o 1000k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post2
+TEST ! cmp $M0/file.copy.pre $M0/file.copy.post2
+TEST truncate -s 1M $M0/file.copy.post2
+TEST cmp $M0/file.copy.pre $M0/file.copy.post2
+TEST unlink $M0/file
+
+#Make sure offset/size are modified so that 3 blocks are allocated
+TEST touch $M0/f1
+TEST fallocate -o 1280 -l 1024 $M0/f1
+EXPECT "^2304$" stat -c "%s" $M0/f1
+EXPECT "^1536$" stat -c "%s" $B0/${V0}0/f1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/ec/ec-rebalance.t b/tests/basic/ec/ec-rebalance.t
new file mode 100644
index 00000000000..b5c30727a15
--- /dev/null
+++ b/tests/basic/ec/ec-rebalance.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+#
+# This will test the rebalance failure reported in 1447559
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# Create files
+for i in {1..10}
+do
+ dd if=/dev/urandom of=$M0/file$i bs=1024k count=1
+done
+
+md5_1=$(md5sum $M0/file1 | awk '{print $1}')
+md5_2=$(md5sum $M0/file2 | awk '{print $1}')
+md5_3=$(md5sum $M0/file3 | awk '{print $1}')
+md5_4=$(md5sum $M0/file4 | awk '{print $1}')
+md5_5=$(md5sum $M0/file5 | awk '{print $1}')
+md5_6=$(md5sum $M0/file6 | awk '{print $1}')
+md5_7=$(md5sum $M0/file7 | awk '{print $1}')
+md5_8=$(md5sum $M0/file8 | awk '{print $1}')
+md5_9=$(md5sum $M0/file9 | awk '{print $1}')
+md5_10=$(md5sum $M0/file10 | awk '{print $1}')
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{3..5}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Remount to avoid any caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT "$md5_1" echo $(md5sum $M0/file1 | awk '{print $1}')
+EXPECT "$md5_2" echo $(md5sum $M0/file2 | awk '{print $1}')
+EXPECT "$md5_3" echo $(md5sum $M0/file3 | awk '{print $1}')
+EXPECT "$md5_4" echo $(md5sum $M0/file4 | awk '{print $1}')
+EXPECT "$md5_5" echo $(md5sum $M0/file5 | awk '{print $1}')
+EXPECT "$md5_6" echo $(md5sum $M0/file6 | awk '{print $1}')
+EXPECT "$md5_7" echo $(md5sum $M0/file7 | awk '{print $1}')
+EXPECT "$md5_8" echo $(md5sum $M0/file8 | awk '{print $1}')
+EXPECT "$md5_9" echo $(md5sum $M0/file9 | awk '{print $1}')
+EXPECT "$md5_10" echo $(md5sum $M0/file10 | awk '{print $1}')
+
+cleanup;
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index 4e17ec509fd..fab22d8240d 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -168,6 +168,10 @@ void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,
const char * linkname, loc_t * loc, mode_t umask,
dict_t * xdata);
+void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+
void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_truncate_cbk_t func, void *data,
loc_t * loc, off_t offset, dict_t * xdata);
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index 744797bfcfe..5405d69fe35 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -828,7 +828,208 @@ out:
}
}
-/* FOP: truncate */
+/*********************************************************************
+ *
+ * File Operation : fallocate
+ *
+ *********************************************************************/
+
+int32_t ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
+
+void ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fallocate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fallocate,
+ fop->fd, fop->int32, fop->offset,
+ fop->size, fop->xdata);
+}
+
+int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (fop->size == 0) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ if (fop->int32 & (FALLOC_FL_COLLAPSE_RANGE
+ |FALLOC_FL_INSERT_RANGE
+ |FALLOC_FL_ZERO_RANGE
+ |FALLOC_FL_PUNCH_HOLE)) {
+ ec_fop_set_error(fop, ENOTSUP);
+ return EC_STATE_REPORT;
+ }
+ fop->user_size = fop->offset + fop->size;
+ fop->head = ec_adjust_offset (fop->xl->private, &fop->offset, 1);
+ fop->size = ec_adjust_size (fop->xl->private, fop->head + fop->size, 1);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META |
+ EC_QUERY_INFO);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
+ if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ } else if (fop->user_size > cbk->iatt[0].ia_size) {
+ cbk->iatt[1].ia_size = fop->user_size;
+
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(ec_set_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ cbk->iatt[1].ia_size));
+ } else {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fallocate != NULL) {
+ fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fallocate != NULL) {
+ fop->cbks.fallocate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = { .fallocate = func };
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(FALLOCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
+ minimum, ec_wind_fallocate, ec_manager_fallocate,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+ fop->int32 = mode;
+ fop->offset = offset;
+ fop->size = len;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+ goto out;
+ }
+ }
+
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/*********************************************************************
+ *
+ * File Operation : truncate
+ *
+ *********************************************************************/
int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask)
{
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 2009faccbaf..3d40b111819 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -769,10 +769,11 @@ int32_t ec_gf_fentrylk(call_frame_t * frame, xlator_t * this,
}
int32_t ec_gf_fallocate(call_frame_t * frame, xlator_t * this, fd_t * fd,
- int32_t keep_size, off_t offset, size_t len,
+ int32_t mode, off_t offset, size_t len,
dict_t * xdata)
{
- default_fallocate_failure_cbk(frame, ENOTSUP);
+ ec_fallocate(frame, this, -1, EC_MINIMUM_MIN, default_fallocate_cbk,
+ NULL, fd, mode, offset, len, xdata);
return 0;
}
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 999e8f60534..c78decd8482 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -754,11 +754,11 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto out;
}
- ret = sys_fallocate (pfd->fd, flags, offset, len);
- if (ret == -1) {
- ret = -errno;
- goto out;
- }
+ ret = sys_fallocate (pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
ret = posix_fdstat (this, pfd->fd, statpost);
if (ret == -1) {