From bf5c0efdec755297a976a6253665431d700d0737 Mon Sep 17 00:00:00 2001 From: Raghavendra G Date: Fri, 21 Jan 2011 07:47:08 +0000 Subject: performance/write-behind: backport write-behind from 3.1 Signed-off-by: Raghavendra G Signed-off-by: Anand V. Avati BUG: 934 (md5sum mismatch when files are transferred using vsftpd) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=934 --- .../performance/write-behind/src/write-behind.c | 851 +++++++++++++++------ 1 file changed, 620 insertions(+), 231 deletions(-) diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 91342700e..79fe056d3 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -1,18 +1,18 @@ /* - Copyright (c) 2006-2009 Gluster, Inc. + Copyright (c) 2006-2010 Gluster, Inc. This file is part of GlusterFS. GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published + it under the terms of the GNU Affero General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GlusterFS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + Affero General Public License for more details. - You should have received a copy of the GNU General Public License + You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ @@ -35,6 +35,7 @@ #include "common-utils.h" #include "call-stub.h" #include "statedump.h" +#include #define MAX_VECTOR_COUNT 8 #define WB_AGGREGATE_SIZE 131072 /* 128 KB */ @@ -51,6 +52,7 @@ typedef struct wb_file { uint64_t disable_till; size_t window_conf; size_t window_current; + int32_t flags; size_t aggregate_current; int32_t refcount; int32_t op_ret; @@ -64,20 +66,29 @@ typedef struct wb_file { typedef struct wb_request { - list_head_t list; - list_head_t winds; - list_head_t unwinds; - list_head_t other_requests; - call_stub_t *stub; - size_t write_size; - int32_t refcount; - wb_file_t *file; + list_head_t list; + list_head_t winds; + list_head_t unwinds; + list_head_t other_requests; + call_stub_t *stub; + size_t write_size; + int32_t refcount; + wb_file_t *file; + glusterfs_fop_t fop; union { struct { char write_behind; char stack_wound; char got_reply; char virgin; + char flush_all; /* while trying to sync to back-end, + * don't wait till a data of size + * equal to configured aggregate-size + * is accumulated, instead sync + * whatever data currently present in + * request queue. + */ + }write_request; struct { @@ -106,7 +117,6 @@ typedef struct wb_local { int op_ret; int op_errno; call_frame_t *frame; - fd_t *fd; int32_t reply_count; } wb_local_t; @@ -116,26 +126,28 @@ typedef struct wb_page wb_page_t; int32_t -wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all); +wb_process_queue (call_frame_t *frame, wb_file_t *file); ssize_t wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds); ssize_t __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size, - char wind_all, char enable_trickling_writes); + char enable_trickling_writes); -static void +static int __wb_request_unref (wb_request_t *this) { + int ret = -1; + if (this->refcount <= 0) { gf_log ("wb-request", GF_LOG_DEBUG, "refcount(%d) is <= 0", this->refcount); - return; + goto out; } - this->refcount--; + ret = --this->refcount; if (this->refcount == 0) { list_del_init (&this->list); if (this->stub && this->stub->fop == GF_FOP_WRITE) { @@ -144,25 +156,33 @@ __wb_request_unref (wb_request_t *this) FREE (this); } + +out: + return ret; } -static void +static int wb_request_unref (wb_request_t *this) { wb_file_t *file = NULL; + int ret = 0; + if (this == NULL) { gf_log ("wb-request", GF_LOG_DEBUG, "request is NULL"); - return; + goto out; } file = this->file; LOCK (&file->lock); { - __wb_request_unref (this); + ret = __wb_request_unref (this); } UNLOCK (&file->lock); + +out: + return ret; } @@ -204,11 +224,11 @@ wb_request_ref (wb_request_t *this) wb_request_t * wb_enqueue (wb_file_t *file, call_stub_t *stub) { - wb_request_t *request = NULL; - call_frame_t *frame = NULL; - wb_local_t *local = NULL; - struct iovec *vector = NULL; - int32_t count = 0; + wb_request_t *request = NULL, *tmp = NULL; + call_frame_t *frame = NULL; + wb_local_t *local = NULL; + struct iovec *vector = NULL; + int32_t count = 0; request = CALLOC (1, sizeof (*request)); if (request == NULL) { @@ -222,6 +242,7 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) request->stub = stub; request->file = file; + request->fop = stub->fop; frame = stub->frame; local = frame->local; @@ -233,11 +254,11 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) vector = stub->args.writev.vector; count = stub->args.writev.count; - frame = stub->frame; - local = frame->local; request->write_size = iov_length (vector, count); - local->op_ret = request->write_size; - local->op_errno = 0; + if (local) { + local->op_ret = request->write_size; + local->op_errno = 0; + } request->flags.write_request.virgin = 1; } @@ -254,6 +275,13 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) file->aggregate_current += request->write_size; } else { + list_for_each_entry (tmp, &file->request, list) { + if (tmp->stub && tmp->stub->fop + == GF_FOP_WRITE) { + tmp->flags.write_request.flush_all = 1; + } + } + /*reference for resuming */ __wb_request_ref (request); } @@ -266,7 +294,7 @@ out: wb_file_t * -wb_file_create (xlator_t *this, fd_t *fd) +wb_file_create (xlator_t *this, fd_t *fd, int32_t flags) { wb_file_t *file = NULL; wb_conf_t *conf = this->private; @@ -288,6 +316,7 @@ wb_file_create (xlator_t *this, fd_t *fd) file->this = this; file->refcount = 1; file->window_conf = conf->window_size; + file->flags = flags; fd_ctx_set (fd, this, (uint64_t)(long)file); @@ -359,7 +388,7 @@ wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } UNLOCK (&file->lock); - ret = wb_process_queue (frame, file, 0); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { LOCK (&file->lock); { @@ -393,6 +422,12 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) size_t bytecount = 0; wb_conf_t *conf = NULL; fd_t *fd = NULL; + int32_t op_errno = -1; + + if (frame == NULL) { + op_errno = EINVAL; + goto out; + } conf = file->this->private; list_for_each_entry (request, winds, winds) { @@ -403,6 +438,8 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) } if (total_count == 0) { + gf_log (file->this->name, GF_LOG_TRACE, "no vectors are to be" + "synced"); goto out; } @@ -411,18 +448,27 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) vector = MALLOC (VECTORSIZE (MAX_VECTOR_COUNT)); if (vector == NULL) { bytes = -1; + op_errno = ENOMEM; + gf_log (file->this->name, GF_LOG_ERROR, + "out of memory"); goto out; } iobref = iobref_new (); if (iobref == NULL) { bytes = -1; + op_errno = ENOMEM; + gf_log (file->this->name, GF_LOG_ERROR, + "out of memory"); goto out; } local = CALLOC (1, sizeof (*local)); if (local == NULL) { bytes = -1; + op_errno = ENOMEM; + gf_log (file->this->name, GF_LOG_ERROR, + "out of memory"); goto out; } @@ -464,6 +510,9 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) sync_frame = copy_frame (frame); if (sync_frame == NULL) { bytes = -1; + op_errno = ENOMEM; + gf_log (file->this->name, GF_LOG_ERROR, + "out of memory"); goto out; } @@ -506,7 +555,16 @@ out: } if (local != NULL) { + /* had we winded these requests, we would have unrefed + * in wb_sync_cbk. + */ + list_for_each_entry_safe (request, dummy, &local->winds, + winds) { + wb_request_unref (request); + } + FREE (local); + local = NULL; } if (iobref != NULL) { @@ -517,6 +575,28 @@ out: FREE (vector); } + if (bytes == -1) { + /* + * had we winded these requests, we would have unrefed + * in wb_sync_cbk. + */ + if (local) { + list_for_each_entry_safe (request, dummy, &local->winds, + winds) { + wb_request_unref (request); + } + } + + if (file != NULL) { + LOCK (&file->lock); + { + file->op_ret = -1; + file->op_errno = op_errno; + } + UNLOCK (&file->lock); + } + } + return bytes; } @@ -551,7 +631,7 @@ wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file, 0); + ret = wb_process_queue (process_frame, file); if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) { LOCK (&file->lock); { @@ -636,7 +716,7 @@ wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) goto unwind; } - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_errno = ENOMEM; goto unwind; @@ -680,7 +760,7 @@ wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, request = local->request; if ((file != NULL) && (request != NULL)) { wb_request_unref (request); - ret = wb_process_queue (frame, file, 0); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_ret = -1; op_errno = ENOMEM; @@ -718,14 +798,15 @@ wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) if ((!S_ISDIR (fd->inode->st_mode)) && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", + gf_log (this->name, GF_LOG_DEBUG, + "write behind file pointer is not stored in " + "context of fd(%p), returning EBADFD", fd); STACK_UNWIND_STRICT (fstat, frame, -1, EBADFD, NULL); return 0; } - + file = (wb_file_t *)(long)tmp_file; local = CALLOC (1, sizeof (*local)); if (local == NULL) { @@ -753,7 +834,7 @@ wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) /* FIXME:should the request queue be emptied in case of error? */ - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_errno = ENOMEM; goto unwind; @@ -802,14 +883,15 @@ wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } } - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, + postbuf); if (request) { wb_request_unref (request); } if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file, 0); + ret = wb_process_queue (process_frame, file); if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) { LOCK (&file->lock); { @@ -901,7 +983,7 @@ wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) goto unwind; } - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_errno = ENOMEM; goto unwind; @@ -944,14 +1026,15 @@ wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if ((request != NULL) && (file != NULL)) { wb_request_unref (request); - ret = wb_process_queue (frame, file, 0); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_ret = -1; op_errno = ENOMEM; } } - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, + postbuf); return 0; } @@ -1020,7 +1103,7 @@ wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) goto unwind; } - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_errno = ENOMEM; goto unwind; @@ -1049,7 +1132,8 @@ unwind: int32_t wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *statpre, struct stat *statpost) + int32_t op_ret, int32_t op_errno, struct stat *statpre, + struct stat *statpost) { wb_local_t *local = NULL; wb_request_t *request = NULL; @@ -1070,14 +1154,15 @@ wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } } - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost); + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, + statpost); if (request) { wb_request_unref (request); } if (request && (process_frame != NULL)) { - ret = wb_process_queue (process_frame, file, 0); + ret = wb_process_queue (process_frame, file); if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) { LOCK (&file->lock); { @@ -1169,7 +1254,8 @@ wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, local->file = file; if (file) { - stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, valid); + stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, + valid); if (stub == NULL) { op_errno = ENOMEM; goto unwind; @@ -1181,7 +1267,7 @@ wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, goto unwind; } - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_errno = ENOMEM; goto unwind; @@ -1229,22 +1315,13 @@ wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, wbflags = local->wbflags; if (op_ret != -1) { - file = wb_file_create (this, fd); + file = wb_file_create (this, fd, flags); if (file == NULL) { op_ret = -1; op_errno = ENOMEM; goto out; } - /* - If mandatory locking has been enabled on this file, - we disable caching on it - */ - - if ((fd->inode->st_mode & S_ISGID) - && !(fd->inode->st_mode & S_IXGRP)) - file->disabled = 1; - /* If O_DIRECT then, we disable chaching */ if (((flags & O_DIRECT) == O_DIRECT) || ((flags & O_ACCMODE) == O_RDONLY) @@ -1308,23 +1385,19 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, wb_conf_t *conf = this->private; if (op_ret != -1) { - file = wb_file_create (this, fd); + if (frame->local) { + flags = (long) frame->local; + } + + file = wb_file_create (this, fd, flags); if (file == NULL) { op_ret = -1; op_errno = ENOMEM; goto out; } - /* - * If mandatory locking has been enabled on this file, - * we disable caching on it - */ - if ((fd->inode->st_mode & S_ISGID) - && !(fd->inode->st_mode & S_IXGRP)) - file->disabled = 1; /* If O_DIRECT then, we disable chaching */ if (frame->local) { - flags = (long)frame->local; if (((flags & O_DIRECT) == O_DIRECT) || ((flags & O_ACCMODE) == O_RDONLY) || (((flags & O_SYNC) == O_SYNC) @@ -1339,8 +1412,8 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, frame->local = NULL; out: - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, preparent, - postparent); + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent); return 0; } @@ -1351,22 +1424,29 @@ wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, { frame->local = (void *)(long)flags; - STACK_WIND (frame, - wb_create_cbk, + STACK_WIND (frame, wb_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, fd); return 0; } - +/* Mark all the contiguous write requests for winding starting from head of + * request list. Stops marking at the first non-write request found. If + * file is opened with O_APPEND, make sure all the writes marked for winding + * will fit into a single write call to server. + */ size_t __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) { - wb_request_t *request = NULL; - size_t size = 0; - char first_request = 1; + wb_request_t *request = NULL; + size_t size = 0; + char first_request = 1; off_t offset_expected = 0; + wb_conf_t *conf = NULL; + int count = 0; + + conf = file->this->private; list_for_each_entry (request, list, list) { @@ -1385,9 +1465,18 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) break; } + if ((file->flags & O_APPEND) + && (((size + request->write_size) + > conf->aggregate_size) + || ((count + request->stub->args.writev.count) + > MAX_VECTOR_COUNT))) { + break; + } + size += request->write_size; offset_expected += request->write_size; file->aggregate_current -= request->write_size; + count += request->stub->args.writev.count; request->flags.write_request.stack_wound = 1; list_add_tail (&request->winds, winds); @@ -1400,7 +1489,8 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) void __wb_can_wind (list_head_t *list, char *other_fop_in_queue, - char *non_contiguous_writes, char *incomplete_writes) + char *non_contiguous_writes, char *incomplete_writes, + char *wind_all) { wb_request_t *request = NULL; char first_request = 1; @@ -1426,7 +1516,11 @@ __wb_can_wind (list_head_t *list, char *other_fop_in_queue, if (!request->flags.write_request.stack_wound) { if (first_request) { first_request = 0; - offset_expected = request->stub->args.writev.off; + offset_expected + = request->stub->args.writev.off; + if (wind_all != NULL) { + *wind_all = request->flags.write_request.flush_all; + } } if (offset_expected != request->stub->args.writev.off) { @@ -1446,7 +1540,7 @@ __wb_can_wind (list_head_t *list, char *other_fop_in_queue, ssize_t __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, - char wind_all, char enable_trickling_writes) + char enable_trickling_writes) { size_t size = 0; char other_fop_in_queue = 0; @@ -1454,6 +1548,7 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, char non_contiguous_writes = 0; wb_request_t *request = NULL; wb_file_t *file = NULL; + char wind_all = 0; if (list_empty (list)) { goto out; @@ -1462,17 +1557,16 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, request = list_entry (list->next, typeof (*request), list); file = request->file; - if (!wind_all && (file->aggregate_current < aggregate_conf)) { - __wb_can_wind (list, &other_fop_in_queue, - &non_contiguous_writes, &incomplete_writes); - } + __wb_can_wind (list, &other_fop_in_queue, + &non_contiguous_writes, &incomplete_writes, &wind_all); - if ((enable_trickling_writes && !incomplete_writes) - || (wind_all) || (non_contiguous_writes) - || (other_fop_in_queue) - || (file->aggregate_current >= aggregate_conf)) { + if (!incomplete_writes && ((enable_trickling_writes) + || (wind_all) || (non_contiguous_writes) + || (other_fop_in_queue) + || (file->aggregate_current + >= aggregate_conf))) { size = __wb_mark_wind_all (file, list, winds); - } + } out: return size; @@ -1535,7 +1629,8 @@ __wb_mark_unwinds (list_head_t *list, list_head_t *unwinds) if (file->window_current <= file->window_conf) { __wb_mark_unwind_till (list, unwinds, - file->window_conf - file->window_current); + file->window_conf + - file->window_current); } out: @@ -1573,6 +1668,7 @@ wb_stack_unwind (list_head_t *unwinds) wb_request_t *request = NULL, *dummy = NULL; call_frame_t *frame = NULL; wb_local_t *local = NULL; + int ret = 0, write_requests_removed = 0; list_for_each_entry_safe (request, dummy, unwinds, unwinds) { @@ -1582,10 +1678,13 @@ wb_stack_unwind (list_head_t *unwinds) STACK_UNWIND (frame, local->op_ret, local->op_errno, &buf, &buf); - wb_request_unref (request); + ret = wb_request_unref (request); + if (ret == 0) { + write_requests_removed++; + } } - return 0; + return write_requests_removed; } @@ -1623,7 +1722,7 @@ wb_resume_other_requests (call_frame_t *frame, wb_file_t *file, } if (fops_removed > 0) { - ret = wb_process_queue (frame, file, 0); + ret = wb_process_queue (frame, file); } out: @@ -1635,19 +1734,27 @@ int32_t wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds, list_head_t *unwinds, list_head_t *other_requests) { - int32_t ret = -1; + int32_t ret = -1, write_requests_removed = 0; ret = wb_stack_unwind (unwinds); - if (ret == -1) { - goto out; - } + + write_requests_removed = ret; ret = wb_sync (frame, file, winds); if (ret == -1) { goto out; } - ret = wb_resume_other_requests (frame, file, other_requests); + wb_resume_other_requests (frame, file, other_requests); + + /* wb_stack_unwind does wb_request_unref after unwinding a write + * request. Hence if a write-request was just freed in wb_stack_unwind, + * we have to process request queue once again to unblock requests + * blocked on the writes just unwound. + */ + if (write_requests_removed > 0) { + ret = wb_process_queue (frame, file); + } out: return ret; @@ -1771,7 +1878,7 @@ __wb_collapse_write_bufs (list_head_t *requests, size_t page_size) int32_t -wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all) +wb_process_queue (call_frame_t *frame, wb_file_t *file) { list_head_t winds, unwinds, other_requests; size_t size = 0; @@ -1808,7 +1915,6 @@ wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all) if (count == 0) { __wb_mark_winds (&file->request, &winds, size, - flush_all, conf->enable_trickling_writes); } @@ -1934,7 +2040,7 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, goto unwind; } - ret = wb_process_queue (process_frame, file, 0); + ret = wb_process_queue (process_frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_errno = ENOMEM; goto unwind; @@ -1976,14 +2082,15 @@ wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if ((request != NULL) && (file != NULL)) { wb_request_unref (request); - ret = wb_process_queue (frame, file, 0); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_ret = -1; op_errno = ENOMEM; } } - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref); + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, + stbuf, iobref); return 0; } @@ -2054,7 +2161,7 @@ wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; } - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL); @@ -2074,123 +2181,118 @@ wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } -int -wb_flush_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +int32_t +wb_ffr_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; - wb_request_t *request = NULL; - fd_t *fd = NULL; - - local = frame->local; - if (local) { - file = local->file; - request = local->request; - } - - if (request) - wb_request_unref (request); - - if (file) { - wb_process_queue (frame, file, 0); - fd = file->fd; - } - - if (fd) - fd_unref (fd); - STACK_DESTROY (frame->root); return 0; } -int -wb_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int32_t +wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - int fop_ret = 0; - int fop_errno = 0; + wb_local_t *local = NULL; + wb_file_t *file = NULL; local = frame->local; - if (local) { - file = local->file; - request = local->request; - } - - fop_ret = op_ret; - fop_errno = op_errno; - - if (request) - wb_request_unref (request); + file = local->file; - if (!file) - goto unwind; + if (file != NULL) { + LOCK (&file->lock); + { + if (file->op_ret == -1) { + op_ret = file->op_ret; + op_errno = file->op_errno; - if (file->op_ret < 0) { - fop_ret = file->op_ret; - fop_errno = file->op_errno; + file->op_ret = 0; + } + } + UNLOCK (&file->lock); } - - wb_process_queue (frame, file, 0); - -unwind: - STACK_UNWIND_STRICT (flush, frame, fop_ret, fop_errno); + + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); return 0; } -int +int32_t wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) { - wb_conf_t *conf = NULL; - call_frame_t *flush_frame = NULL; - wb_file_t *file = NULL; - wb_local_t *local = NULL; - int op_ret = 0; - int op_errno = 0; + wb_conf_t *conf = NULL; + wb_local_t *local = NULL; + wb_file_t *file = NULL; + call_frame_t *flush_frame = NULL, *process_frame = NULL; + int32_t op_ret = -1, op_errno = -1, ret = -1; conf = this->private; + local = frame->local; - if (local) - file = local->file; + file = local->file; - if (conf->flush_behind) - flush_frame = copy_frame (frame); + LOCK (&file->lock); + { + op_ret = file->op_ret; + op_errno = file->op_errno; + } + UNLOCK (&file->lock); - if (flush_frame) { - flush_frame->local = frame->local; - frame->local = NULL; + if (local && local->request) { + process_frame = copy_frame (frame); + if (process_frame == NULL) { + gf_log (this->name, GF_LOG_ERROR, "out of memory"); + goto unwind; + } - file->fd = fd_ref (fd); + wb_request_unref (local->request); + } + + if (conf->flush_behind) { + flush_frame = copy_frame (frame); + if (flush_frame == NULL) { + gf_log (this->name, GF_LOG_ERROR, "out of memory"); + goto unwind; + } - STACK_WIND (flush_frame, wb_flush_bg_cbk, + STACK_WIND (flush_frame, + wb_ffr_bg_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush, fd); - if (file) { - op_ret = file->op_ret; - op_errno = file->op_errno; - } - - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); - } else { - STACK_WIND (frame, wb_flush_cbk, + STACK_WIND (frame, + wb_ffr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush, fd); } + if (process_frame != NULL) { + ret = wb_process_queue (process_frame, file); + if ((ret == -1) && (errno == ENOMEM)) { + STACK_DESTROY (process_frame->root); + goto unwind; + } + + STACK_DESTROY (process_frame->root); + } + + if (conf->flush_behind) { + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + } + + return 0; + +unwind: + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); return 0; } -int +int32_t wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) { wb_conf_t *conf = NULL; @@ -2198,7 +2300,7 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) wb_local_t *local = NULL; uint64_t tmp_file = 0; call_stub_t *stub = NULL; - call_frame_t *process_frame = NULL; + call_frame_t *flush_frame = NULL; wb_request_t *request = NULL; int32_t ret = 0; @@ -2216,59 +2318,60 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) file = (wb_file_t *)(long)tmp_file; - if (!file) - goto nofile; + if (file != NULL) { + local = CALLOC (1, sizeof (*local)); + if (local == NULL) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + return 0; + } - if (file->disabled) - goto nofile; + local->file = file; - local = CALLOC (1, sizeof (*local)); - if (local == NULL) { - STACK_UNWIND (frame, -1, ENOMEM, NULL); - return 0; - } + frame->local = local; - local->file = file; + stub = fop_flush_stub (frame, wb_flush_helper, fd); + if (stub == NULL) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + return 0; + } - frame->local = local; - stub = fop_flush_stub (frame, wb_flush_helper, fd); - if (stub == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - return 0; - } + request = wb_enqueue (file, stub); + if (request == NULL) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + call_stub_destroy (stub); + return 0; + } - process_frame = copy_frame (frame); - if (process_frame == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - call_stub_destroy (stub); - return 0; - } + ret = wb_process_queue (frame, file); + if ((ret == -1) && (errno == ENOMEM)) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + call_stub_destroy (stub); + return 0; + } + } else { + if (conf->flush_behind) { + flush_frame = copy_frame (frame); + if (flush_frame == NULL) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + return 0; + } - request = wb_enqueue (file, stub); - if (request == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - call_stub_destroy (stub); - STACK_DESTROY (process_frame->root); - return 0; - } + STACK_UNWIND_STRICT (flush, frame, 0, 0); - ret = wb_process_queue (process_frame, file, 1); - if (ret == -1) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - call_stub_destroy (stub); - STACK_DESTROY (process_frame->root); - return 0; + STACK_WIND (flush_frame, + wb_ffr_bg_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + } else { + STACK_WIND (frame, + wb_ffr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + } } - STACK_DESTROY (process_frame->root); - - return 0; - -nofile: - STACK_WIND (frame, wb_flush_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, - fd); return 0; } @@ -2300,7 +2403,7 @@ wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (request) { wb_request_unref (request); - ret = wb_process_queue (frame, file, 0); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { op_ret = -1; op_errno = ENOMEM; @@ -2376,7 +2479,7 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync) return 0; } - ret = wb_process_queue (frame, file, 1); + ret = wb_process_queue (frame, file); if ((ret == -1) && (errno == ENOMEM)) { STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL); @@ -2457,6 +2560,288 @@ wb_priv_dump (xlator_t *this) return 0; } + +void +__wb_dump_requests (struct list_head *head, char *prefix, char passive) +{ + char key[GF_DUMP_MAX_BUF_LEN]; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + wb_request_t *request = NULL; + + list_for_each_entry (request, head, list) { + gf_proc_dump_build_key (key, prefix, + passive ? "passive-request" + : "active-request"); + gf_proc_dump_build_key (key_prefix, key, + gf_fop_list[request->fop]); + + gf_proc_dump_add_section(key_prefix); + + gf_proc_dump_build_key (key, key_prefix, "request-ptr"); + gf_proc_dump_write (key, "%p", request); + + gf_proc_dump_build_key (key, key_prefix, "refcount"); + gf_proc_dump_write (key, "%d", request->refcount); + + if (request->fop == GF_FOP_WRITE) { + gf_proc_dump_build_key (key, key_prefix, "stack_wound"); + gf_proc_dump_write (key, "%d", + request->flags.write_request.stack_wound); + + gf_proc_dump_build_key (key, key_prefix, "size"); + gf_proc_dump_write (key, "%"GF_PRI_SIZET, + request->write_size); + + gf_proc_dump_build_key (key, key_prefix, "offset"); + gf_proc_dump_write (key, "%"PRId64, + request->stub->args.writev.off); + + gf_proc_dump_build_key (key, key_prefix, + "write_behind"); + gf_proc_dump_write (key, "%d", + request->flags.write_request.write_behind); + + gf_proc_dump_build_key (key, key_prefix, "got_reply"); + gf_proc_dump_write (key, "%d", + request->flags.write_request.got_reply); + + gf_proc_dump_build_key (key, key_prefix, "virgin"); + gf_proc_dump_write (key, "%d", + request->flags.write_request.virgin); + + gf_proc_dump_build_key (key, key_prefix, "flush_all"); + gf_proc_dump_write (key, "%d", + request->flags.write_request.flush_all); + } else { + gf_proc_dump_build_key (key, key_prefix, + "marked_for_resume"); + gf_proc_dump_write (key, "%d", + request->flags.other_requests.marked_for_resume); + } + } +} + + +int +wb_file_dump (xlator_t *this, fd_t *fd) +{ + wb_file_t *file = NULL; + uint64_t tmp_file = 0; + int32_t ret = -1; + char key[GF_DUMP_MAX_BUF_LEN]; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + if ((fd == NULL) || (this == NULL)) { + ret = 0; + goto out; + } + + ret = fd_ctx_get (fd, this, &tmp_file); + if (ret == -1) { + ret = 0; + goto out; + } + + file = (wb_file_t *)(long)tmp_file; + if (file == NULL) { + ret = 0; + goto out; + } + + gf_proc_dump_build_key (key_prefix, + "xlator.performance.write-behind", + "file"); + + gf_proc_dump_add_section (key_prefix); + + gf_proc_dump_build_key (key, key_prefix, "fd"); + gf_proc_dump_write (key, "%p", fd); + + gf_proc_dump_build_key (key, key_prefix, "disabled"); + gf_proc_dump_write (key, "%d", file->disabled); + + gf_proc_dump_build_key (key, key_prefix, "disable_till"); + gf_proc_dump_write (key, "%lu", file->disable_till); + + gf_proc_dump_build_key (key, key_prefix, "window_conf"); + gf_proc_dump_write (key, "%"GF_PRI_SIZET, file->window_conf); + + gf_proc_dump_build_key (key, key_prefix, "window_current"); + gf_proc_dump_write (key, "%"GF_PRI_SIZET, file->window_current); + + gf_proc_dump_build_key (key, key_prefix, "flags"); + gf_proc_dump_write (key, "%s", (file->flags & O_APPEND) ? "O_APPEND" + : "!O_APPEND"); + + gf_proc_dump_build_key (key, key_prefix, "aggregate_current"); + gf_proc_dump_write (key, "%"GF_PRI_SIZET, file->aggregate_current); + + gf_proc_dump_build_key (key, key_prefix, "refcount"); + gf_proc_dump_write (key, "%d", file->refcount); + + gf_proc_dump_build_key (key, key_prefix, "op_ret"); + gf_proc_dump_write (key, "%d", file->op_ret); + + gf_proc_dump_build_key (key, key_prefix, "op_errno"); + gf_proc_dump_write (key, "%d", file->op_errno); + + LOCK (&file->lock); + { + if (!list_empty (&file->request)) { + __wb_dump_requests (&file->request, key_prefix, 0); + } + + if (!list_empty (&file->passive_requests)) { + __wb_dump_requests (&file->passive_requests, key_prefix, + 1); + } + } + UNLOCK (&file->lock); + +out: + return ret; +} + + +int +validate_options (xlator_t *this, dict_t *options, char **op_errstr) +{ + char *str=NULL; + uint64_t window_size; + gf_boolean_t flush_behind; + + int ret = 0; + + + + ret = dict_get_str (options, "cache-size", + &str); + if (ret == 0) { + ret = gf_string2bytesize (str, &window_size); + if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, "Validation" + "'option cache-size %s failed , Invalid" + " number format, ", str); + *op_errstr = strdup ("Error, Invalid num format"); + ret = -1; + goto out; + } + + if (window_size < (512 * GF_UNIT_KB)) { + gf_log(this->name, GF_LOG_WARNING, "Validation" + "'option cache-size %s' failed , Min value" + "should be 512KiB ", str); + *op_errstr = strdup ("Error, Should be min 512KB"); + ret = -1; + goto out; + } + + if (window_size > (1 * GF_UNIT_GB)) { + gf_log(this->name, GF_LOG_WARNING, "Reconfiguration" + "'option cache-size %s' failed , Max value" + "can be 1 GiB", str); + *op_errstr = strdup ("Error, Max Value is 1GB"); + ret = -1; + goto out; + } + + + gf_log(this->name, GF_LOG_DEBUG, "Validated " + "'option cache-size %s '", str); + } + ret = dict_get_str (options, "flush-behind", + &str); + if (ret == 0) { + ret = gf_string2boolean (str, + &flush_behind); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "'flush-behind' takes only boolean arguments"); + *op_errstr = strdup ("Error, should be boolean"); + ret = -1; + goto out; + } + } + ret =0; +out: + return ret; +} + +int +reconfigure (xlator_t *this, dict_t *options) +{ + char *str=NULL; + uint64_t window_size; + wb_conf_t *conf = NULL; + int ret = 0; + + conf = this->private; + + ret = dict_get_str (options, "cache-size", + &str); + if (ret == 0) { + ret = gf_string2bytesize (str, &window_size); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Reconfiguration" + "'option cache-size %s failed , Invalid" + " number format, Defaulting to old value " + "(%"PRIu64")", str, conf->window_size); + ret = -1; + goto out; + } + + if (window_size < (512 * GF_UNIT_KB)) { + gf_log(this->name, GF_LOG_ERROR, "Reconfiguration" + "'option cache-size %s' failed , Max value" + "can be 512KiB, Defaulting to old value " + "(%"PRIu64")", str, conf->window_size); + ret = -1; + goto out; + } + + if (window_size > (2 * GF_UNIT_GB)) { + gf_log(this->name, GF_LOG_ERROR, "Reconfiguration" + "'option cache-size %s' failed , Max value" + "can be 1 GiB, Defaulting to old value " + "(%"PRIu64")", str, conf->window_size); + ret = -1; + goto out; + } + + conf->window_size = window_size; + gf_log(this->name, GF_LOG_DEBUG, "Reconfiguring " + "'option cache-size %s ' to %"PRIu64, str, + conf->window_size); + } + else + conf->window_size = WB_WINDOW_SIZE; + + ret = dict_get_str (options, "flush-behind", + &str); + if (ret == 0) { + ret = gf_string2boolean (str, + &conf->flush_behind); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'flush-behind' takes only boolean arguments"); + conf->flush_behind = 1; + return -1; + } + if (conf->flush_behind) { + gf_log (this->name, GF_LOG_DEBUG, + "enabling flush-behind"); + } + else + gf_log (this->name, GF_LOG_DEBUG, + "disabling flush-behind"); + } + + +out: + return 0; + +} + int32_t init (xlator_t *this) { @@ -2551,7 +2936,7 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_ERROR, "aggregate-size(%"PRIu64") cannot be more than " "window-size" - "(%"PRIu64")", conf->window_size, conf->aggregate_size); + "(%"PRIu64")", conf->aggregate_size, conf->window_size); FREE (conf); return -1; } @@ -2599,10 +2984,16 @@ fini (xlator_t *this) { wb_conf_t *conf = this->private; + if (!conf) + return; + this->private = NULL; FREE (conf); return; } +struct xlator_mops mops = { + +}; struct xlator_fops fops = { .writev = wb_writev, @@ -2618,8 +3009,6 @@ struct xlator_fops fops = { .setattr = wb_setattr, }; -struct xlator_mops mops = { -}; struct xlator_cbks cbks = { .release = wb_release -- cgit