summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-11-08 21:07:09 +0100
committerRaghavendra Bhat <raghavendra@redhat.com>2014-12-18 06:36:43 -0800
commit2372a9ed54f6e30e3ac744e1daf8ec379dd236ae (patch)
treeb93c44f0a169c32f0de0a7b6af33e01ffc7319fa
parentad16db31df793d90a40fbfced82cae1c86d94658 (diff)
ec: Remove O_APPEND from flags on create and open.
Allowing O_APPEND flag to pass through to the brick files corrupts fragment contents because writes are not stored on the desired place. Write fop has been modified so that it uses current file size as its write offset. This guarantees that all writes, even those comming from different file descriptors and clients, will write to the end of the file. This is backport of http://review.gluster.org/9079/ Change-Id: I9f721f12217a98231fe52e344166d1c94172c272 BUG: 1161885 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/9080 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com> Tested-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--tests/bugs/bug-1161621.t43
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c4
-rw-r--r--xlators/cluster/ec/src/ec-heal.c2
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c3
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c104
5 files changed, 103 insertions, 53 deletions
diff --git a/tests/bugs/bug-1161621.t b/tests/bugs/bug-1161621.t
new file mode 100644
index 00000000000..918b67679a3
--- /dev/null
+++ b/tests/bugs/bug-1161621.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 5 redundancy 2 $H0:$B0/${V0}{0..4}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-id=/$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+trap "rm -rf $tmpdir" EXIT
+
+TEST dd if=/dev/urandom of=$tmpdir/file bs=1234 count=20
+cs=$(sha1sum $tmpdir/file | awk '{ print $1 }')
+# Test O_APPEND on create
+TEST dd if=$tmpdir/file of=$M0/file bs=1234 count=10 oflag=append
+# Test O_APPEND on open
+TEST dd if=$tmpdir/file of=$M0/file bs=1234 skip=10 oflag=append conv=notrunc
+EXPECT "$cs" echo $(sha1sum $M0/file | awk '{ print $1 }')
+
+# Fill a file with ff (I don't use 0's because empty holes created by an
+# incorrect offset will be returned as 0's and won't be detected)
+dd if=/dev/zero bs=24680 count=1000 | tr '\0' '\377' >$tmpdir/shared
+cs=$(sha1sum $tmpdir/shared | awk '{ print $1 }')
+# Test concurrent writes to the same file using O_APPEND
+dd if=$tmpdir/shared of=$M0/shared bs=123400 count=100 oflag=append conv=notrunc &
+dd if=$tmpdir/shared of=$M1/shared bs=123400 count=100 oflag=append conv=notrunc &
+wait
+
+EXPECT "24680000" stat -c "%s" $M0/shared
+EXPECT "$cs" echo $(sha1sum $M0/shared | awk '{ print $1 }')
+
+TEST rm -rf $tmpdir
+
+cleanup
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 6aa9badb76b..e1bbf7bae15 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -194,7 +194,9 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
return EC_STATE_REPORT;
}
- fop->int32 &= ~O_ACCMODE;
+ /* We need to write to specific offsets on the bricks, so we
+ * need to remove O_APPEND from flags (if present) */
+ fop->int32 &= ~(O_ACCMODE | O_APPEND);
fop->int32 |= O_RDWR;
/* Fall through */
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 73df4eab8f6..956d70a93fd 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -989,7 +989,7 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
}
else
{
- flags = ctx_fd->flags & ~O_TRUNC;
+ flags = ctx_fd->flags & ~(O_TRUNC | O_APPEND);
if ((flags & O_ACCMODE) == O_WRONLY)
{
flags &= ~O_ACCMODE;
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 3f7920239ec..8f89224987d 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -674,6 +674,9 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state)
fop->int32 &= ~O_ACCMODE;
fop->int32 |= O_RDWR;
}
+ /* We need to write to specific offsets on the bricks, so we
+ * need to remove O_APPEND from flags (if present) */
+ fop->int32 &= ~O_APPEND;
/* Fall through */
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index c120f067b6a..a0571d5a6f7 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -1710,64 +1710,16 @@ out:
int32_t ec_writev_init(ec_fop_data_t * fop)
{
- ec_t * ec = fop->xl->private;
- struct iobref * iobref = NULL;
- struct iobuf * iobuf = NULL;
- void * ptr = NULL;
ec_fd_t * ctx;
ctx = ec_fd_get(fop->fd, fop->xl);
- if (ctx != NULL)
- {
- if ((ctx->flags & O_ACCMODE) == O_RDONLY)
- {
+ if (ctx != NULL) {
+ if ((ctx->flags & O_ACCMODE) == O_RDONLY) {
return EBADF;
}
}
- fop->user_size = iov_length(fop->vector, fop->int32);
- fop->head = ec_adjust_offset(ec, &fop->offset, 0);
- fop->size = ec_adjust_size(ec, fop->user_size + fop->head, 0);
-
- iobref = iobref_new();
- if (iobref == NULL)
- {
- goto out;
- }
- iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, fop->size);
- if (iobuf == NULL)
- {
- goto out;
- }
- if (iobref_add(iobref, iobuf) != 0)
- {
- goto out;
- }
-
- ptr = iobuf->ptr + fop->head;
- ec_iov_copy_to(ptr, fop->vector, fop->int32, 0, fop->user_size);
-
- fop->vector[0].iov_base = iobuf->ptr;
- fop->vector[0].iov_len = fop->size;
-
- iobuf_unref(iobuf);
-
- iobref_unref(fop->buffers);
- fop->buffers = iobref;
-
return 0;
-
-out:
- if (iobuf != NULL)
- {
- iobuf_unref(iobuf);
- }
- if (iobref != NULL)
- {
- iobref_unref(iobref);
- }
-
- return EIO;
}
int32_t ec_writev_merge_tail(call_frame_t * frame, void * cookie,
@@ -1844,9 +1796,47 @@ int32_t ec_writev_merge_head(call_frame_t * frame, void * cookie,
void ec_writev_start(ec_fop_data_t * fop)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ void *ptr = NULL;
+ ec_fd_t *ctx;
size_t tail;
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) != 0) {
+ fop->offset = fop->pre_size;
+ }
+ }
+
+ fop->user_size = iov_length(fop->vector, fop->int32);
+ fop->head = ec_adjust_offset(ec, &fop->offset, 0);
+ fop->size = ec_adjust_size(ec, fop->user_size + fop->head, 0);
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get2(fop->xl->ctx->iobuf_pool, fop->size);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ if (iobref_add(iobref, iobuf) != 0) {
+ goto out;
+ }
+
+ ptr = iobuf->ptr + fop->head;
+ ec_iov_copy_to(ptr, fop->vector, fop->int32, 0, fop->user_size);
+
+ fop->vector[0].iov_base = iobuf->ptr;
+ fop->vector[0].iov_len = fop->size;
+
+ iobuf_unref(iobuf);
+
+ iobref_unref(fop->buffers);
+ fop->buffers = iobref;
+
if (fop->head > 0)
{
ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, ec_writev_merge_head,
@@ -1866,6 +1856,18 @@ void ec_writev_start(ec_fop_data_t * fop)
memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
}
}
+
+ return;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ ec_fop_set_error(fop, EIO);
}
int32_t ec_combine_writev(ec_fop_data_t * fop, ec_cbk_data_t * dst,