summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaghavendra G <raghavendra@gluster.com>2010-05-26 06:08:09 +0000
committerAnand V. Avati <avati@dev.gluster.com>2010-05-26 04:49:00 -0700
commit6d9b11dba63d86c48450aa956281114962289ef5 (patch)
tree62ef8d84b7b9cc61a8c686e2dd1feef60539c302
parent1d363d4b7dd5b4dc25892053259ff43f1b4c52c7 (diff)
performance/write-behind: explicitly enforce ordering of overlapping writes.v3.0.5rc2
- If there are non-contiguous offsets (offsets which do not start where previous write ended), wait for completion of previous writes to server, before sending new ones. - Send flush call to server only when all writes are completed. - If a file is opened with O_APPEND, at any point of time a maximum only one write call to server should be in transit. This is to avoid reordering of writes in the presence of afr which can result in data corruption. See bug #934 for more details. Signed-off-by: Raghavendra G <raghavendra@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 934 (md5sum mismatch when files are transferred using vsftpd) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=934
-rw-r--r--xlators/performance/write-behind/src/write-behind.c247
1 files changed, 125 insertions, 122 deletions
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 883d25e..064e0b7 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -55,6 +55,7 @@ typedef struct wb_file {
int32_t refcount;
int32_t op_ret;
int32_t op_errno;
+ int32_t flags;
list_head_t request;
list_head_t passive_requests;
fd_t *fd;
@@ -265,7 +266,7 @@ out:
wb_file_t *
-wb_file_create (xlator_t *this, fd_t *fd)
+wb_file_create (xlator_t *this, fd_t *fd, int32_t flags)
{
wb_file_t *file = NULL;
wb_conf_t *conf = this->private;
@@ -287,6 +288,7 @@ wb_file_create (xlator_t *this, fd_t *fd)
file->this = this;
file->refcount = 1;
file->window_conf = conf->window_size;
+ file->flags = flags;
fd_ctx_set (fd, this, (uint64_t)(long)file);
@@ -1228,7 +1230,7 @@ wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
wbflags = local->wbflags;
if (op_ret != -1) {
- file = wb_file_create (this, fd);
+ file = wb_file_create (this, fd, flags);
if (file == NULL) {
op_ret = -1;
op_errno = ENOMEM;
@@ -1307,7 +1309,7 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
wb_conf_t *conf = this->private;
if (op_ret != -1) {
- file = wb_file_create (this, fd);
+ file = wb_file_create (this, fd, flags);
if (file == NULL) {
op_ret = -1;
op_errno = ENOMEM;
@@ -1338,8 +1340,8 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
out:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, preparent,
- postparent);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent);
return 0;
}
@@ -1358,14 +1360,22 @@ wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
return 0;
}
-
+/* Mark all the contiguous write requests for winding starting from head of
+ * request list. Stops marking at the first non-write request found. If
+ * file is opened with O_APPEND, make sure all the writes marked for winding
+ * will fit into a single write call to server.
+ */
size_t
__wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
{
- wb_request_t *request = NULL;
- size_t size = 0;
- char first_request = 1;
+ wb_request_t *request = NULL;
+ size_t size = 0;
+ char first_request = 1;
off_t offset_expected = 0;
+ wb_conf_t *conf = NULL;
+ int count = 0;
+
+ conf = file->this->private;
list_for_each_entry (request, list, list)
{
@@ -1384,9 +1394,18 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
break;
}
+ if ((file->flags & O_APPEND)
+ && (((size + request->write_size)
+ > conf->aggregate_size)
+ || ((count + request->stub->args.writev.count)
+ > conf->aggregate_size))) {
+ break;
+ }
+
size += request->write_size;
offset_expected += request->write_size;
file->aggregate_current -= request->write_size;
+ count += request->stub->args.writev.count;
request->flags.write_request.stack_wound = 1;
list_add_tail (&request->winds, winds);
@@ -1461,15 +1480,14 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
request = list_entry (list->next, typeof (*request), list);
file = request->file;
- if (!wind_all && (file->aggregate_current < aggregate_conf)) {
- __wb_can_wind (list, &other_fop_in_queue,
- &non_contiguous_writes, &incomplete_writes);
- }
+ __wb_can_wind (list, &other_fop_in_queue,
+ &non_contiguous_writes, &incomplete_writes);
- if ((enable_trickling_writes && !incomplete_writes)
- || (wind_all) || (non_contiguous_writes)
- || (other_fop_in_queue)
- || (file->aggregate_current >= aggregate_conf)) {
+ if (!incomplete_writes && ((enable_trickling_writes)
+ || (wind_all) || (non_contiguous_writes)
+ || (other_fop_in_queue)
+ || (file->aggregate_current
+ >= aggregate_conf))) {
size = __wb_mark_wind_all (file, list, winds);
}
@@ -2087,64 +2105,66 @@ wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno)
{
wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
- char unwind = 0;
- int32_t ret = -1;
- int disabled = 0;
- int64_t disable_till = 0;
+ wb_file_t *file = NULL;
+ wb_conf_t *conf = NULL;
+ int32_t ret = -1;
conf = this->private;
local = frame->local;
+ file = local->file;
- if ((local != NULL) && (local->file != NULL)) {
- file = local->file;
-
+ if (file != NULL) {
LOCK (&file->lock);
{
- disabled = file->disabled;
- disable_till = file->disable_till;
+ if (file->op_ret == -1) {
+ op_ret = file->op_ret;
+ op_errno = file->op_errno;
+
+ file->op_ret = 0;
+ }
}
UNLOCK (&file->lock);
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- unwind = 1;
- } else {
- local->reply_count++;
- /*
- * without flush-behind, unwind should wait for replies
- * of writes queued before and the flush
- */
- if (local->reply_count == 2) {
- unwind = 1;
- }
+ ret = wb_process_queue (frame, file, 0);
+ if ((ret == -1) && (errno == ENOMEM)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
}
- } else {
- unwind = 1;
}
+
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- if (unwind) {
- if (file != NULL) {
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
+ return 0;
+}
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+int32_t
+wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ wb_conf_t *conf = NULL;
+ wb_local_t *local = NULL;
+
+ conf = this->private;
+
+ local = frame->local;
+
+ if (local && local->request) {
+ local->request->stub = NULL;
+ wb_request_unref (local->request);
+ }
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ if (conf->flush_behind) {
+ STACK_WIND (frame,
+ wb_ffr_bg_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ } else {
+ STACK_WIND (frame,
+ wb_ffr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
}
return 0;
@@ -2159,12 +2179,9 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
wb_local_t *local = NULL;
uint64_t tmp_file = 0;
call_stub_t *stub = NULL;
- call_frame_t *process_frame = NULL;
- wb_local_t *tmp_local = NULL;
+ call_frame_t *flush_frame = NULL;
wb_request_t *request = NULL;
int32_t ret = 0;
- int disabled = 0;
- int64_t disable_till = 0;
conf = this->private;
@@ -2183,93 +2200,79 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (file != NULL) {
local = CALLOC (1, sizeof (*local));
if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
return 0;
}
local->file = file;
- frame->local = local;
- stub = fop_flush_cbk_stub (frame, wb_ffr_cbk, 0, 0);
- if (stub == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- return 0;
+ if (conf->flush_behind) {
+ flush_frame = copy_frame (frame);
+ if (flush_frame == NULL) {
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ return 0;
+ }
+ } else {
+ flush_frame = frame;
}
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
- call_stub_destroy (stub);
- return 0;
- }
+ flush_frame->local = local;
- LOCK (&file->lock);
- {
- disabled = file->disabled;
- disable_till = file->disable_till;
- }
- UNLOCK (&file->lock);
-
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- tmp_local = CALLOC (1, sizeof (*local));
- if (tmp_local == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
-
- STACK_DESTROY (process_frame->root);
- call_stub_destroy (stub);
- return 0;
+ stub = fop_flush_stub (flush_frame, wb_flush_helper, fd);
+ if (stub == NULL) {
+ if (flush_frame != frame) {
+ STACK_DESTROY (flush_frame->root);
}
- tmp_local->file = file;
- process_frame->local = tmp_local;
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ return 0;
}
- fd_ref (fd);
-
request = wb_enqueue (file, stub);
if (request == NULL) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ if (flush_frame != frame) {
+ STACK_DESTROY (flush_frame->root);
+ }
- fd_unref (fd);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
call_stub_destroy (stub);
- STACK_DESTROY (process_frame->root);
return 0;
}
- ret = wb_process_queue (process_frame, file, 1);
+ ret = wb_process_queue (flush_frame, file, 1);
if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ if (flush_frame != frame) {
+ STACK_DESTROY (flush_frame->root);
+ }
- fd_unref (fd);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
call_stub_destroy (stub);
- STACK_DESTROY (process_frame->root);
return 0;
}
- }
-
- if ((file != NULL) && conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- STACK_WIND (process_frame,
- wb_ffr_bg_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
} else {
- STACK_WIND (frame,
- wb_ffr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
+ if (conf->flush_behind) {
+ flush_frame = copy_frame (frame);
+ if (flush_frame == NULL) {
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ return 0;
+ }
- if (process_frame != NULL) {
- STACK_DESTROY (process_frame->root);
+ STACK_WIND (flush_frame,
+ wb_ffr_bg_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ } else {
+ STACK_WIND (frame,
+ wb_ffr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
}
}
-
- if (file != NULL) {
- fd_unref (fd);
+ if (conf->flush_behind) {
+ STACK_UNWIND_STRICT (flush, frame, 0, 0);
}
return 0;