diff options
| author | Raghavendra G <raghavendra@gluster.com> | 2010-05-26 06:08:09 +0000 | 
|---|---|---|
| committer | Anand V. Avati <avati@dev.gluster.com> | 2010-05-26 04:49:00 -0700 | 
| commit | 6d9b11dba63d86c48450aa956281114962289ef5 (patch) | |
| tree | 62ef8d84b7b9cc61a8c686e2dd1feef60539c302 /xlators/performance/write-behind/src/write-behind.c | |
| parent | 1d363d4b7dd5b4dc25892053259ff43f1b4c52c7 (diff) | |
performance/write-behind: explicitly enforce ordering of overlapping writes.v3.0.5rc2
- If there are non-contiguous offsets (offsets which do not start where
    previous write ended), wait for completion of previous writes to server,
    before sending new ones.
  - Send flush call to server only when all writes are completed.
  - If a file is opened with O_APPEND, at any point of time a maximum only one
    write call to server should be in transit. This is to avoid reordering of
    writes in the presence of afr which can result in data corruption.
    See bug #934 for more details.
Signed-off-by: Raghavendra G <raghavendra@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 934 (md5sum mismatch when files are transferred using vsftpd)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=934
Diffstat (limited to 'xlators/performance/write-behind/src/write-behind.c')
| -rw-r--r-- | xlators/performance/write-behind/src/write-behind.c | 247 | 
1 files changed, 125 insertions, 122 deletions
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 883d25e9666..064e0b717c7 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -55,6 +55,7 @@ typedef struct wb_file {          int32_t      refcount;          int32_t      op_ret;          int32_t      op_errno; +        int32_t      flags;          list_head_t  request;          list_head_t  passive_requests;          fd_t        *fd; @@ -265,7 +266,7 @@ out:  wb_file_t * -wb_file_create (xlator_t *this, fd_t *fd) +wb_file_create (xlator_t *this, fd_t *fd, int32_t flags)  {          wb_file_t *file = NULL;          wb_conf_t *conf = this->private;  @@ -287,6 +288,7 @@ wb_file_create (xlator_t *this, fd_t *fd)          file->this = this;          file->refcount = 1;          file->window_conf = conf->window_size; +        file->flags = flags;          fd_ctx_set (fd, this, (uint64_t)(long)file); @@ -1228,7 +1230,7 @@ wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,          wbflags = local->wbflags;          if (op_ret != -1) { -                file = wb_file_create (this, fd); +                file = wb_file_create (this, fd, flags);                  if (file == NULL) {                          op_ret = -1;                          op_errno = ENOMEM; @@ -1307,7 +1309,7 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          wb_conf_t *conf = this->private;          if (op_ret != -1) { -                file = wb_file_create (this, fd); +                file = wb_file_create (this, fd, flags);                  if (file == NULL) {                          op_ret = -1;                          op_errno = ENOMEM; @@ -1338,8 +1340,8 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          frame->local = NULL;  out:         -        STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, preparent, -                      postparent); +        STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, +                             preparent, postparent);          return 0;  } @@ -1358,14 +1360,22 @@ wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          return 0;  } - +/* Mark all the contiguous write requests for winding starting from head of + * request list. Stops marking at the first non-write request found. If + * file is opened with O_APPEND, make sure all the writes marked for winding + * will fit into a single write call to server. + */  size_t  __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)  { -        wb_request_t *request = NULL; -        size_t        size = 0; -        char          first_request = 1; +        wb_request_t *request         = NULL; +        size_t        size            = 0; +        char          first_request   = 1;          off_t         offset_expected = 0; +        wb_conf_t    *conf            = NULL; +        int           count           = 0; + +        conf = file->this->private;          list_for_each_entry (request, list, list)          { @@ -1384,9 +1394,18 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)                                  break;                          } +                        if ((file->flags & O_APPEND) +                            && (((size + request->write_size) +                                 > conf->aggregate_size) +                                || ((count + request->stub->args.writev.count) +                                    > conf->aggregate_size))) { +                                break; +                        } +                          size += request->write_size;                          offset_expected += request->write_size;                          file->aggregate_current -= request->write_size; +                        count += request->stub->args.writev.count;                          request->flags.write_request.stack_wound = 1;                          list_add_tail (&request->winds, winds); @@ -1461,15 +1480,14 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,          request = list_entry (list->next, typeof (*request), list);          file = request->file; -        if (!wind_all && (file->aggregate_current < aggregate_conf)) { -                __wb_can_wind (list, &other_fop_in_queue, -                               &non_contiguous_writes, &incomplete_writes); -        } +        __wb_can_wind (list, &other_fop_in_queue, +                       &non_contiguous_writes, &incomplete_writes); -        if ((enable_trickling_writes && !incomplete_writes) -            || (wind_all) || (non_contiguous_writes) -            || (other_fop_in_queue) -            || (file->aggregate_current >= aggregate_conf)) { +        if (!incomplete_writes && ((enable_trickling_writes) +                                   || (wind_all) || (non_contiguous_writes) +                                   || (other_fop_in_queue) +                                   || (file->aggregate_current +                                       >= aggregate_conf))) {                  size = __wb_mark_wind_all (file, list, winds);          }  @@ -2087,64 +2105,66 @@ wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,              int32_t op_errno)  {          wb_local_t *local = NULL; -        wb_file_t  *file = NULL; -        wb_conf_t  *conf = NULL; -        char        unwind = 0; -        int32_t     ret = -1; -        int         disabled = 0; -        int64_t     disable_till = 0; +        wb_file_t  *file  = NULL; +        wb_conf_t  *conf  = NULL; +        int32_t     ret   = -1;          conf = this->private;          local = frame->local; +        file = local->file; -        if ((local != NULL) && (local->file != NULL)) { -                file = local->file; - +        if (file != NULL) {                  LOCK (&file->lock);                  { -                        disabled = file->disabled; -                        disable_till = file->disable_till; +                        if (file->op_ret == -1) { +                                op_ret = file->op_ret; +                                op_errno = file->op_errno; + +                                file->op_ret = 0; +                        }                  }                  UNLOCK (&file->lock); -                if (conf->flush_behind -                    && (!disabled) && (disable_till == 0)) { -                        unwind = 1; -                } else { -                        local->reply_count++; -                        /*  -                         * without flush-behind, unwind should wait for replies -                         * of writes queued before and the flush  -                         */ -                        if (local->reply_count == 2) { -                                unwind = 1; -                        } +                ret = wb_process_queue (frame, file, 0); +                if ((ret == -1) && (errno == ENOMEM)) { +                        op_ret = -1; +                        op_errno = ENOMEM;                  } -        } else { -                unwind = 1;          } +                 +        STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); -        if (unwind) { -                if (file != NULL) { -                        LOCK (&file->lock); -                        { -                                if (file->op_ret == -1) { -                                        op_ret = file->op_ret; -                                        op_errno = file->op_errno; +        return 0; +} -                                        file->op_ret = 0; -                                } -                        } -                        UNLOCK (&file->lock); -                        ret = wb_process_queue (frame, file, 0); -                        if ((ret == -1) && (errno == ENOMEM)) { -                                op_ret = -1; -                                op_errno = ENOMEM; -                        } -                } +int32_t +wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ +        wb_conf_t  *conf   = NULL; +        wb_local_t *local  = NULL; + +        conf = this->private; + +        local = frame->local; + +        if (local && local->request) { +                local->request->stub = NULL; +                wb_request_unref (local->request); +        } -                STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); +        if (conf->flush_behind) { +                STACK_WIND (frame, +                            wb_ffr_bg_cbk, +                            FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->flush, +                            fd); +        } else { +                STACK_WIND (frame, +                            wb_ffr_cbk, +                            FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->flush, +                            fd);          }          return 0; @@ -2159,12 +2179,9 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)          wb_local_t   *local = NULL;  	uint64_t      tmp_file = 0;          call_stub_t  *stub = NULL; -        call_frame_t *process_frame = NULL; -        wb_local_t   *tmp_local = NULL; +        call_frame_t *flush_frame = NULL;          wb_request_t *request = NULL;          int32_t       ret = 0; -        int           disabled = 0; -        int64_t       disable_till = 0;          conf = this->private; @@ -2183,93 +2200,79 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)          if (file != NULL) {                  local = CALLOC (1, sizeof (*local));                  if (local == NULL) { -                        STACK_UNWIND (frame, -1, ENOMEM, NULL); +                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);                          return 0;                  }                  local->file = file; -                frame->local = local; -                stub = fop_flush_cbk_stub (frame, wb_ffr_cbk, 0, 0); -                if (stub == NULL) { -                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); -                        return 0; +                if (conf->flush_behind) { +                        flush_frame = copy_frame (frame); +                        if (flush_frame == NULL) { +                                STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); +                                return 0; +                        } +                } else { +                        flush_frame = frame;                  } -                process_frame = copy_frame (frame); -                if (process_frame == NULL) { -                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); -                        call_stub_destroy (stub); -                        return 0; -                } +                flush_frame->local = local; -                LOCK (&file->lock); -                { -                        disabled = file->disabled; -                        disable_till = file->disable_till; -                } -                UNLOCK (&file->lock); -                 -                if (conf->flush_behind -                    && (!disabled) && (disable_till == 0)) { -                        tmp_local = CALLOC (1, sizeof (*local)); -                        if (tmp_local == NULL) { -                                STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); -                                  -                                STACK_DESTROY (process_frame->root); -                                call_stub_destroy (stub); -                                return 0; +                stub = fop_flush_stub (flush_frame, wb_flush_helper, fd); +                if (stub == NULL) { +                        if (flush_frame != frame) { +                                STACK_DESTROY (flush_frame->root);                          } -                        tmp_local->file = file; -                        process_frame->local = tmp_local; +                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); +                        return 0;                  } -                fd_ref (fd); -                  request = wb_enqueue (file, stub);                  if (request == NULL) { -                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); +                        if (flush_frame != frame) { +                                STACK_DESTROY (flush_frame->root); +                        } -                        fd_unref (fd); +                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);                          call_stub_destroy (stub); -                        STACK_DESTROY (process_frame->root);                          return 0;                  } -                ret = wb_process_queue (process_frame, file, 1);  +                ret = wb_process_queue (flush_frame, file, 1);                   if ((ret == -1) && (errno == ENOMEM)) { -                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); +                        if (flush_frame != frame) { +                                STACK_DESTROY (flush_frame->root); +                        } -                        fd_unref (fd); +                        STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);                          call_stub_destroy (stub); -                        STACK_DESTROY (process_frame->root);                          return 0;                  } -        } -                 -        if ((file != NULL) && conf->flush_behind -            && (!disabled) && (disable_till == 0)) { -                STACK_WIND (process_frame, -                            wb_ffr_bg_cbk, -                            FIRST_CHILD(this), -                            FIRST_CHILD(this)->fops->flush, -                            fd);          } else { -                STACK_WIND (frame, -                            wb_ffr_cbk, -                            FIRST_CHILD(this), -                            FIRST_CHILD(this)->fops->flush, -                            fd); +                if (conf->flush_behind) { +                        flush_frame = copy_frame (frame); +                        if (flush_frame == NULL) { +                                STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); +                                return 0; +                        } -                if (process_frame != NULL) { -                        STACK_DESTROY (process_frame->root); +                        STACK_WIND (flush_frame, +                                    wb_ffr_bg_cbk, +                                    FIRST_CHILD(this), +                                    FIRST_CHILD(this)->fops->flush, +                                    fd); +                } else { +                        STACK_WIND (frame, +                                    wb_ffr_cbk, +                                    FIRST_CHILD(this), +                                    FIRST_CHILD(this)->fops->flush, +                                    fd);                  }          } -          -        if (file != NULL) { -                fd_unref (fd); +        if (conf->flush_behind) { +                STACK_UNWIND_STRICT (flush, frame, 0, 0);          }          return 0;  | 
