summaryrefslogtreecommitdiffstats
path: root/xlators/performance/read-ahead
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/performance/read-ahead')
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am9
-rw-r--r--xlators/performance/read-ahead/src/page.c684
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h26
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1597
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h148
5 files changed, 1419 insertions, 1045 deletions
diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am
index 7bb902282..be80ae7ac 100644
--- a/xlators/performance/read-ahead/src/Makefile.am
+++ b/xlators/performance/read-ahead/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = read-ahead.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-read_ahead_la_LDFLAGS = -module -avoidversion
+read_ahead_la_LDFLAGS = -module -avoid-version
read_ahead_la_SOURCES = read-ahead.c page.c
read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = read-ahead.h
+noinst_HEADERS = read-ahead.h read-ahead-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c
index 07ab84ed8..e79e7ae78 100644
--- a/xlators/performance/read-ahead/src/page.c
+++ b/xlators/performance/read-ahead/src/page.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -32,79 +23,88 @@
ra_page_t *
ra_page_get (ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
- if (page == &file->pages || page->offset != rounded_offset)
- page = NULL;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- return page;
+ if (page == &file->pages || page->offset != rounded_offset)
+ page = NULL;
+
+out:
+ return page;
}
ra_page_t *
ra_page_create (ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
- ra_page_t *newpage = NULL;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ra_page_t *newpage = NULL;
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
- if (page == &file->pages || page->offset != rounded_offset) {
- newpage = CALLOC (1, sizeof (*newpage));
- if (!newpage)
- return NULL;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
+
+ if (page == &file->pages || page->offset != rounded_offset) {
+ newpage = GF_CALLOC (1, sizeof (*newpage), gf_ra_mt_ra_page_t);
+ if (!newpage) {
+ goto out;
+ }
- newpage->offset = rounded_offset;
- newpage->prev = page->prev;
- newpage->next = page;
- newpage->file = file;
- page->prev->next = newpage;
- page->prev = newpage;
+ newpage->offset = rounded_offset;
+ newpage->prev = page->prev;
+ newpage->next = page;
+ newpage->file = file;
+ page->prev->next = newpage;
+ page->prev = newpage;
- page = newpage;
- }
+ page = newpage;
+ }
- return page;
+out:
+ return page;
}
void
ra_wait_on_page (ra_page_t *page, call_frame_t *frame)
{
- ra_waitq_t *waitq = NULL;
- ra_local_t *local = NULL;
-
- local = frame->local;
- waitq = CALLOC (1, sizeof (*waitq));
- if (!waitq) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "out of memory");
+ ra_waitq_t *waitq = NULL;
+ ra_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+
+ local = frame->local;
+
+ waitq = GF_CALLOC (1, sizeof (*waitq), gf_ra_mt_ra_waitq_t);
+ if (!waitq) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto out;
- }
+ }
- waitq->data = frame;
- waitq->next = page->waitq;
- page->waitq = waitq;
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ page->waitq = waitq;
- ra_local_lock (local);
- {
- local->wait_count++;
- }
- ra_local_unlock (local);
+ ra_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ra_local_unlock (local);
out:
return;
@@ -114,118 +114,146 @@ out:
void
ra_waitq_return (ra_waitq_t *waitq)
{
- ra_waitq_t *trav = NULL;
- ra_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ra_waitq_t *trav = NULL;
+ ra_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
+
+ frame = trav->data;
+ ra_frame_return (frame);
+ GF_FREE (trav);
+ }
- frame = trav->data;
- ra_frame_return (frame);
- free (trav);
- }
+ return;
}
int
ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_local_t *local = NULL;
- off_t pending_offset = 0;
- ra_file_t *file = NULL;
- ra_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- ra_waitq_t *waitq = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fd = local->fd;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
-
- file = (ra_file_t *)(long)tmp_file;
- pending_offset = local->pending_offset;
- trav_offset = pending_offset;
- payload_size = op_ret;
-
- ra_file_lock (file);
- {
- if (op_ret >= 0)
- file->stbuf = *stbuf;
-
- if (op_ret < 0) {
- page = ra_page_get (file, pending_offset);
- if (page)
- waitq = ra_page_error (page, op_ret, op_errno);
- goto unlock;
- }
-
- page = ra_page_get (file, pending_offset);
- if (!page) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
- pending_offset, file->page_size, file);
- goto unlock;
- }
-
- if (page->vector) {
- iobref_unref (page->iobref);
- free (page->vector);
- }
-
- page->vector = iov_dup (vector, count);
+ ra_local_t *local = NULL;
+ off_t pending_offset = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ fd_t *fd = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_ASSERT (frame);
+
+ local = frame->local;
+ fd = local->fd;
+
+ fd_ctx_get (fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ pending_offset = local->pending_offset;
+
+ if (file == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "read-ahead context not set in fd (%p)", fd);
+ op_ret = -1;
+ op_errno = EBADF;
+ goto out;
+ }
+
+ ra_file_lock (file);
+ {
+ if (op_ret >= 0)
+ file->stbuf = *stbuf;
+
+ page = ra_page_get (file, pending_offset);
+
+ if (!page) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
+ pending_offset, file->page_size, file);
+ goto unlock;
+ }
+
+ /*
+ * "Dirty" means that the request was a pure read-ahead; it's
+ * set for requests we issue ourselves, and cleared when user
+ * requests are issued or put on the waitq. "Poisoned" means
+ * that we got a write while a read was still in flight, and we
+ * couldn't stop it so we marked it instead. If it's both
+ * dirty and poisoned by the time we get here, we cancel its
+ * effect so that a subsequent user read doesn't get data that
+ * we know is stale (because we made it stale ourselves). We
+ * can't use ESTALE because that has special significance.
+ * ECANCELED has no such special meaning, and is close to what
+ * we're trying to indicate.
+ */
+ if (page->dirty && page->poisoned) {
+ op_ret = -1;
+ op_errno = ECANCELED;
+ }
+
+ if (op_ret < 0) {
+ waitq = ra_page_error (page, op_ret, op_errno);
+ goto unlock;
+ }
+
+ if (page->vector) {
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ }
+
+ page->vector = iov_dup (vector, count);
if (page->vector == NULL) {
waitq = ra_page_error (page, -1, ENOMEM);
goto unlock;
}
- page->count = count;
- page->iobref = iobref_ref (iobref);
- page->ready = 1;
+ page->count = count;
+ page->iobref = iobref_ref (iobref);
+ page->ready = 1;
- page->size = iov_length (vector, count);
+ page->size = iov_length (vector, count);
- waitq = ra_page_wakeup (page);
- }
+ waitq = ra_page_wakeup (page);
+ }
unlock:
- ra_file_unlock (file);
+ ra_file_unlock (file);
- ra_waitq_return (waitq);
+ ra_waitq_return (waitq);
- fd_unref (local->fd);
+ fd_unref (local->fd);
- free (frame->local);
- frame->local = NULL;
+ mem_put (frame->local);
+ frame->local = NULL;
- STACK_DESTROY (frame->root);
- return 0;
+out:
+ STACK_DESTROY (frame->root);
+ return 0;
}
void
ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
{
- call_frame_t *fault_frame = NULL;
- ra_local_t *fault_local = NULL, *local = NULL;
- ra_page_t *page = NULL;
- ra_waitq_t *waitq = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- local = frame->local;
- fault_frame = copy_frame (frame);
+ call_frame_t *fault_frame = NULL;
+ ra_local_t *fault_local = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ fault_frame = copy_frame (frame);
if (fault_frame == NULL) {
op_ret = -1;
op_errno = ENOMEM;
goto err;
}
- fault_local = CALLOC (1, sizeof (ra_local_t));
+ fault_local = mem_get0 (THIS->local_pool);
if (fault_local == NULL) {
STACK_DESTROY (fault_frame->root);
op_ret = -1;
@@ -233,18 +261,18 @@ ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
goto err;
}
- fault_frame->local = fault_local;
- fault_local->pending_offset = offset;
- fault_local->pending_size = file->page_size;
+ fault_frame->local = fault_local;
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = file->page_size;
- fault_local->fd = fd_ref (file->fd);
+ fault_local->fd = fd_ref (file->fd);
- STACK_WIND (fault_frame, ra_fault_cbk,
- FIRST_CHILD (fault_frame->this),
- FIRST_CHILD (fault_frame->this)->fops->readv,
- file->fd, file->page_size, offset);
+ STACK_WIND (fault_frame, ra_fault_cbk,
+ FIRST_CHILD (fault_frame->this),
+ FIRST_CHILD (fault_frame->this)->fops->readv,
+ file->fd, file->page_size, offset, 0, NULL);
- return;
+ return;
err:
ra_file_lock (file);
@@ -255,80 +283,88 @@ err:
op_errno);
}
ra_file_unlock (file);
-
+
if (waitq != NULL) {
ra_waitq_return (waitq);
}
+
+out:
+ return;
}
+
void
ra_frame_fill (ra_page_t *page, call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ra_fill_t *new = NULL;
-
- local = frame->local;
- fill = &local->fill;
-
- if (local->op_ret != -1 && page->size) {
- if (local->offset > page->offset)
- src_offset = local->offset - page->offset;
- else
- dst_offset = page->offset - local->offset;
-
- copy_size = min (page->size - src_offset,
- local->size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- fill = fill->next;
- while (fill != &local->fill) {
- if (fill->offset > page->offset) {
- break;
- }
- fill = fill->next;
- }
-
- new = CALLOC (1, sizeof (*new));
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ra_fill_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+
+ local = frame->local;
+ fill = &local->fill;
+
+ if (local->op_ret != -1 && page->size) {
+ if (local->offset > page->offset)
+ src_offset = local->offset - page->offset;
+ else
+ dst_offset = page->offset - local->offset;
+
+ copy_size = min (page->size - src_offset,
+ local->size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ fill = fill->next;
+ while (fill != &local->fill) {
+ if (fill->offset > page->offset) {
+ break;
+ }
+ fill = fill->next;
+ }
+
+ new = GF_CALLOC (1, sizeof (*new), gf_ra_mt_ra_fill_t);
if (new == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto out;
}
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- NULL);
- new->vector = CALLOC (new->count, sizeof (struct iovec));
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref (page->iobref);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ NULL);
+ new->vector = GF_CALLOC (new->count, sizeof (struct iovec),
+ gf_ra_mt_iovec);
if (new->vector == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- FREE (new);
+ GF_FREE (new);
goto out;
}
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- new->vector);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ new->vector);
- new->next = fill;
- new->prev = new->next->prev;
- new->next->prev = new;
- new->prev->next = new;
+ new->next = fill;
+ new->prev = new->next->prev;
+ new->next->prev = new;
+ new->prev->next = new;
- local->op_ret += copy_size;
- }
+ local->op_ret += copy_size;
+ }
out:
return;
@@ -338,35 +374,36 @@ out:
void
ra_frame_unwind (call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- int32_t count = 0;
- struct iovec *vector;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- ra_fill_t *next = NULL;
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fill = local->fill.next;
-
- iobref = iobref_new ();
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ ra_fill_t *next = NULL;
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+
+ local = frame->local;
+ fill = local->fill.next;
+
+ iobref = iobref_new ();
if (iobref == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
}
- frame->local = NULL;
+ frame->local = NULL;
- while (fill != &local->fill) {
- count += fill->count;
- fill = fill->next;
- }
+ while (fill != &local->fill) {
+ count += fill->count;
+ fill = fill->next;
+ }
- vector = CALLOC (count, sizeof (*vector));
+ vector = GF_CALLOC (count, sizeof (*vector), gf_ra_mt_iovec);
if (vector == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
@@ -374,42 +411,43 @@ ra_frame_unwind (call_frame_t *frame)
iobref = NULL;
}
- fill = local->fill.next;
+ fill = local->fill.next;
- while (fill != &local->fill) {
- next = fill->next;
+ while (fill != &local->fill) {
+ next = fill->next;
if ((vector != NULL) && (iobref != NULL)) {
memcpy (((char *)vector) + copied, fill->vector,
fill->count * sizeof (*vector));
-
+
copied += (fill->count * sizeof (*vector));
iobref_merge (iobref, fill->iobref);
}
- fill->next->prev = fill->prev;
- fill->prev->next = fill->prev;
+ fill->next->prev = fill->prev;
+ fill->prev->next = fill->prev;
- iobref_unref (fill->iobref);
- free (fill->vector);
- free (fill);
+ iobref_unref (fill->iobref);
+ GF_FREE (fill->vector);
+ GF_FREE (fill);
- fill = next;
- }
+ fill = next;
+ }
- fd = local->fd;
- ret = fd_ctx_get (fd, frame->this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ fd = local->fd;
+ fd_ctx_get (fd, frame->this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
- vector, count, &file->stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
+ vector, count, &file->stbuf, iobref, NULL);
- iobref_unref (iobref);
- pthread_mutex_destroy (&local->local_lock);
- free (local);
- free (vector);
+ iobref_unref (iobref);
+ pthread_mutex_destroy (&local->local_lock);
+ mem_put (local);
+ GF_FREE (vector);
- return;
+out:
+ return;
}
/*
@@ -420,25 +458,28 @@ ra_frame_unwind (call_frame_t *frame)
void
ra_frame_return (call_frame_t *frame)
{
- ra_local_t *local = NULL;
- int32_t wait_count = 0;
+ ra_local_t *local = NULL;
+ int32_t wait_count = 0;
- local = frame->local;
- assert (local->wait_count > 0);
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- ra_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ra_local_unlock (local);
+ local = frame->local;
+ GF_ASSERT (local->wait_count > 0);
- if (!wait_count)
- ra_frame_unwind (frame);
+ ra_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ra_local_unlock (local);
+
+ if (!wait_count)
+ ra_frame_unwind (frame);
- return;
+out:
+ return;
}
-/*
+/*
* ra_page_wakeup -
* @page:
*
@@ -446,19 +487,24 @@ ra_frame_return (call_frame_t *frame)
ra_waitq_t *
ra_page_wakeup (ra_page_t *page)
{
- ra_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame;
+ ra_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
- waitq = page->waitq;
- page->waitq = NULL;
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ra_frame_fill (page, frame);
- }
+ waitq = page->waitq;
+ page->waitq = NULL;
- return waitq;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ra_frame_fill (page, frame);
+ }
+
+ if (page->stale) {
+ ra_page_purge (page);
+ }
+out:
+ return waitq;
}
/*
@@ -469,14 +515,20 @@ ra_page_wakeup (ra_page_t *page)
void
ra_page_purge (ra_page_t *page)
{
- page->prev->next = page->next;
- page->next->prev = page->prev;
-
- if (page->iobref) {
- iobref_unref (page->iobref);
- }
- free (page->vector);
- free (page);
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
+
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ if (page->iobref) {
+ iobref_unref (page->iobref);
+ }
+
+ GF_FREE (page->vector);
+ GF_FREE (page);
+
+out:
+ return;
}
/*
@@ -489,32 +541,33 @@ ra_page_purge (ra_page_t *page)
ra_waitq_t *
ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
{
+ ra_waitq_t *waitq = NULL;
+ ra_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ ra_local_t *local = NULL;
- ra_waitq_t *waitq = NULL;
- ra_waitq_t *trav = NULL;
- call_frame_t *frame = NULL;
- ra_local_t *local = NULL;
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
- local = frame->local;
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
+ local = frame->local;
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
- ra_page_purge (page);
+ ra_page_purge (page);
- return waitq;
+out:
+ return waitq;
}
-/*
+/*
* ra_file_destroy -
* @file:
*
@@ -522,24 +575,29 @@ ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
void
ra_file_destroy (ra_file_t *file)
{
- ra_conf_t *conf = NULL;
- ra_page_t *trav = NULL;
-
- conf = file->conf;
-
- ra_conf_lock (conf);
- {
- file->prev->next = file->next;
- file->next->prev = file->prev;
- }
- ra_conf_unlock (conf);
-
- trav = file->pages.next;
- while (trav != &file->pages) {
- ra_page_error (trav, -1, EINVAL);
- trav = file->pages.next;
- }
-
- pthread_mutex_destroy (&file->file_lock);
- free (file);
+ ra_conf_t *conf = NULL;
+ ra_page_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
+
+ conf = file->conf;
+
+ ra_conf_lock (conf);
+ {
+ file->prev->next = file->next;
+ file->next->prev = file->prev;
+ }
+ ra_conf_unlock (conf);
+
+ trav = file->pages.next;
+ while (trav != &file->pages) {
+ ra_page_error (trav, -1, EINVAL);
+ trav = file->pages.next;
+ }
+
+ pthread_mutex_destroy (&file->file_lock);
+ GF_FREE (file);
+
+out:
+ return;
}
diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
new file mode 100644
index 000000000..219e29289
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __RA_MEM_TYPES_H__
+#define __RA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ra_mem_types_ {
+ gf_ra_mt_ra_file_t = gf_common_mt_end + 1,
+ gf_ra_mt_ra_conf_t,
+ gf_ra_mt_ra_page_t,
+ gf_ra_mt_ra_waitq_t,
+ gf_ra_mt_ra_fill_t,
+ gf_ra_mt_iovec,
+ gf_ra_mt_end
+};
+#endif
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
index da9715c84..069ab1f1a 100644
--- a/xlators/performance/read-ahead/src/read-ahead.c
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -1,27 +1,18 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*
- TODO:
- - handle O_DIRECT
- - maintain offset, flush on lseek
- - ensure efficient memory managment in case of random seek
+/*
+ TODO:
+ - handle O_DIRECT
+ - maintain offset, flush on lseek
+ - ensure efficient memory management in case of random seek
*/
#ifndef _CONFIG_H
@@ -44,173 +35,177 @@ read_ahead (call_frame_t *frame, ra_file_t *file);
int
ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- long wbflags = 0;
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- if (op_ret == -1) {
- goto unwind;
- }
+ conf = this->private;
- wbflags = (long)frame->local;
+ if (op_ret == -1) {
+ goto unwind;
+ }
- file = CALLOC (1, sizeof (*file));
- if (!file) {
+ file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
+ if (!file) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
-
- if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT open, we disable caching on it */
+ goto unwind;
+ }
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
+ /* If O_DIRECT open, we disable caching on it */
- if (wbflags & GF_OPEN_NOWB) {
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
file->disabled = 1;
- }
- file->offset = (unsigned long long) 0;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
+ file->offset = (unsigned long long) 0;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
+ if (!file->disabled) {
+ file->page_count = 1;
+ }
- if (!file->disabled) {
- file->page_count = 1;
- }
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "cannot set read-ahead context information in fd (%p)",
+ fd);
+ ra_file_destroy (file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+unwind:
frame->local = NULL;
-unwind:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
int
ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- if (op_ret == -1) {
- goto unwind;
- }
+ conf = this->private;
- file = CALLOC (1, sizeof (*file));
- if (!file) {
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
+ if (!file) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
-
- if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
+ goto unwind;
+ }
- /* If O_DIRECT open, we disable caching on it */
+ /* If O_DIRECT open, we disable caching on it */
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
- file->offset = (unsigned long long) 0;
- //file->size = fd->inode->buf.st_size;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
+ file->offset = (unsigned long long) 0;
+ //file->size = fd->inode->buf.ia_size;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot set read ahead context information in fd (%p)",
+ fd);
+ ra_file_destroy (file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
int
ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- frame->local = (void *)(long)wbflags;
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
- STACK_WIND (frame, ra_open_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open,
- loc, flags, fd, wbflags);
+ STACK_WIND (frame, ra_open_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
- return 0;
+ return 0;
}
+
int
ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, ra_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+
+ STACK_WIND (frame, ra_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
- return 0;
+ return 0;
}
/* free cache pages between offset and offset+size,
@@ -218,747 +213,1047 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
static void
-flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size)
-{
- ra_page_t *trav = NULL;
- ra_page_t *next = NULL;
-
- ra_file_lock (file);
- {
- trav = file->pages.next;
- while (trav != &file->pages
- && trav->offset < (offset + size)) {
-
- next = trav->next;
- if (trav->offset >= offset && !trav->waitq) {
- ra_page_purge (trav);
- }
- trav = next;
- }
- }
- ra_file_unlock (file);
+flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size,
+ int for_write)
+{
+ ra_page_t *trav = NULL;
+ ra_page_t *next = NULL;
+
+ ra_file_lock (file);
+ {
+ trav = file->pages.next;
+ while (trav != &file->pages
+ && trav->offset < (offset + size)) {
+
+ next = trav->next;
+ if (trav->offset >= offset) {
+ if (!trav->waitq) {
+ ra_page_purge (trav);
+ }
+ else {
+ trav->stale = 1;
+
+ if (for_write) {
+ trav->poisoned = 1;
+ }
+ }
+ }
+ trav = next;
+ }
+ }
+ ra_file_unlock (file);
}
int
ra_release (xlator_t *this, fd_t *fd)
{
- uint64_t tmp_file = 0;
- int ret = 0;
+ uint64_t tmp_file = 0;
+ int ret = 0;
- ret = fd_ctx_del (fd, this, &tmp_file);
-
- if (!ret) {
- ra_file_destroy ((ra_file_t *)(long)tmp_file);
- }
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_file);
+
+ if (!ret) {
+ ra_file_destroy ((ra_file_t *)(long)tmp_file);
+ }
- return 0;
+out:
+ return 0;
}
void
read_ahead (call_frame_t *frame, ra_file_t *file)
{
- off_t ra_offset = 0;
- size_t ra_size = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- off_t cap = 0;
- char fault = 0;
+ off_t ra_offset = 0;
+ size_t ra_size = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ off_t cap = 0;
+ char fault = 0;
- if (!file->page_count)
- return;
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
- ra_size = file->page_size * file->page_count;
- ra_offset = floor (file->offset, file->page_size);
- cap = file->size ? file->size : file->offset + ra_size;
+ if (!file->page_count) {
+ goto out;
+ }
- while (ra_offset < min (file->offset + ra_size, cap)) {
+ ra_size = file->page_size * file->page_count;
+ ra_offset = floor (file->offset, file->page_size);
+ cap = file->size ? file->size : file->offset + ra_size;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, ra_offset);
- }
- ra_file_unlock (file);
+ while (ra_offset < min (file->offset + ra_size, cap)) {
- if (!trav)
- break;
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, ra_offset);
+ }
+ ra_file_unlock (file);
- ra_offset += file->page_size;
- }
+ if (!trav)
+ break;
- if (trav)
- /* comfortable enough */
- return;
-
- trav_offset = ra_offset;
-
- trav = file->pages.next;
- cap = file->size ? file->size : ra_offset + ra_size;
-
- while (trav_offset < min(ra_offset + ra_size, cap)) {
- fault = 0;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- fault = 1;
- trav = ra_page_create (file, trav_offset);
- if (trav)
- trav->dirty = 1;
- }
- }
- ra_file_unlock (file);
-
- if (!trav) {
- /* OUT OF MEMORY */
- break;
- }
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "RA at offset=%"PRId64, trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
- trav_offset += file->page_size;
- }
+ ra_offset += file->page_size;
+ }
+
+ if (trav) {
+ /* comfortable enough */
+ goto out;
+ }
+
+ trav_offset = ra_offset;
+
+ cap = file->size ? file->size : ra_offset + ra_size;
+
+ while (trav_offset < min(ra_offset + ra_size, cap)) {
+ fault = 0;
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ fault = 1;
+ trav = ra_page_create (file, trav_offset);
+ if (trav)
+ trav->dirty = 1;
+ }
+ }
+ ra_file_unlock (file);
+
+ if (!trav) {
+ /* OUT OF MEMORY */
+ break;
+ }
+
+ if (fault) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "RA at offset=%"PRId64, trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+ trav_offset += file->page_size;
+ }
- return;
+out:
+ return;
}
int
ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_DESTROY (frame->root);
- return 0;
+ GF_ASSERT (frame);
+ STACK_DESTROY (frame->root);
+ return 0;
}
static void
dispatch_requests (call_frame_t *frame, ra_file_t *file)
{
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- call_frame_t *ra_frame = NULL;
- char need_atime_update = 1;
- char fault = 0;
-
- local = frame->local;
- conf = file->conf;
-
- rounded_offset = floor (local->offset, file->page_size);
- rounded_end = roof (local->offset + local->size, file->page_size);
-
- trav_offset = rounded_offset;
- trav = file->pages.next;
-
- while (trav_offset < rounded_end) {
- fault = 0;
-
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- trav = ra_page_create (file, trav_offset);
- fault = 1;
- need_atime_update = 0;
- }
-
- if (!trav) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ call_frame_t *ra_frame = NULL;
+ char need_atime_update = 1;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ local = frame->local;
+ conf = file->conf;
+
+ rounded_offset = floor (local->offset, file->page_size);
+ rounded_end = roof (local->offset + local->size, file->page_size);
+
+ trav_offset = rounded_offset;
+
+ while (trav_offset < rounded_end) {
+ fault = 0;
+
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ trav = ra_page_create (file, trav_offset);
+ if (!trav) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ fault = 1;
+ need_atime_update = 0;
}
+ trav->dirty = 0;
+
+ if (trav->ready) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "HIT at offset=%"PRId64".",
+ trav_offset);
+ ra_frame_fill (trav, frame);
+ } else {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "IN-TRANSIT at offset=%"PRId64".",
+ trav_offset);
+ ra_wait_on_page (trav, frame);
+ need_atime_update = 0;
+ }
+ }
+ unlock:
+ ra_file_unlock (file);
- if (trav->ready) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "HIT at offset=%"PRId64".",
- trav_offset);
- ra_frame_fill (trav, frame);
- } else {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "IN-TRANSIT at offset=%"PRId64".",
- trav_offset);
- ra_wait_on_page (trav, frame);
- need_atime_update = 0;
- }
- }
- unlock:
- ra_file_unlock (file);
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "MISS at offset=%"PRId64".",
- trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
-
- trav_offset += file->page_size;
- }
+ if (local->op_ret == -1) {
+ goto out;
+ }
- if (need_atime_update && conf->force_atime_update) {
- /* TODO: use untimens() since readv() can confuse underlying
- io-cache and others */
- ra_frame = copy_frame (frame);
+ if (fault) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "MISS at offset=%"PRId64".",
+ trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+
+ trav_offset += file->page_size;
+ }
+
+ if (need_atime_update && conf->force_atime_update) {
+ /* TODO: use untimens() since readv() can confuse underlying
+ io-cache and others */
+ ra_frame = copy_frame (frame);
if (ra_frame == NULL) {
goto out;
}
- STACK_WIND (ra_frame, ra_need_atime_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, 1, 1);
- }
+ STACK_WIND (ra_frame, ra_need_atime_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ file->fd, 1, 1, 0, NULL);
+ }
out:
- return ;
+ return ;
}
int
ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
+ GF_ASSERT (frame);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+
+ return 0;
}
int
ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- ra_file_t *file = NULL;
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- int op_errno = 0;
- int ret = 0;
- char expected_offset = 1;
- uint64_t tmp_file = 0;
+ ra_file_t *file = NULL;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ int op_errno = EINVAL;
+ char expected_offset = 1;
+ uint64_t tmp_file = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
- offset, size);
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "readv received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ conf = this->private;
- if (file->offset != offset) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unexpected offset (%"PRId64" != %"PRId64") resetting",
- file->offset, offset);
+ gf_log (this->name, GF_LOG_TRACE,
+ "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
+ offset, size);
- expected_offset = file->expected = file->page_count = 0;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "expected offset (%"PRId64") when page_count=%d",
- offset, file->page_count);
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- if (file->expected < (conf->page_size * conf->page_count)) {
- file->expected += size;
- file->page_count = min ((file->expected / file->page_size),
- conf->page_count);
- }
- }
+ if (!file || file->disabled) {
+ goto disabled;
+ }
- if (!expected_offset) {
- flush_region (frame, file, 0, file->pages.prev->offset + 1);
- }
+ if (file->offset != offset) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unexpected offset (%"PRId64" != %"PRId64") resetting",
+ file->offset, offset);
+
+ expected_offset = file->expected = file->page_count = 0;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "expected offset (%"PRId64") when page_count=%d",
+ offset, file->page_count);
+
+ if (file->expected < (file->page_size * conf->page_count)) {
+ file->expected += size;
+ file->page_count = min ((file->expected
+ / file->page_size),
+ conf->page_count);
+ }
+ }
- if (file->disabled) {
- STACK_WIND (frame, ra_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, size, offset);
- return 0;
- }
+ if (!expected_offset) {
+ flush_region (frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
- local = (void *) CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- local->fd = fd;
- local->offset = offset;
- local->size = size;
- local->wait_count = 1;
+ local->fd = fd;
+ local->offset = offset;
+ local->size = size;
+ local->wait_count = 1;
- local->fill.next = &local->fill;
- local->fill.prev = &local->fill;
+ local->fill.next = &local->fill;
+ local->fill.prev = &local->fill;
- pthread_mutex_init (&local->local_lock, NULL);
+ pthread_mutex_init (&local->local_lock, NULL);
- frame->local = local;
+ frame->local = local;
- dispatch_requests (frame, file);
+ dispatch_requests (frame, file);
- flush_region (frame, file, 0, floor (offset, file->page_size));
+ flush_region (frame, file, 0, floor (offset, file->page_size), 0);
read_ahead (frame, file);
- ra_frame_return (frame);
+ ra_frame_return (frame);
- file->offset = offset + size;
+ file->offset = offset + size;
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+
+ return 0;
- return 0;
+disabled:
+ STACK_WIND (frame, ra_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ GF_ASSERT (frame);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
ra_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *prebuf, struct stat *postbuf)
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ GF_ASSERT (frame);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
int
-ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "flush received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ fd_ctx_get (fd, this, &tmp_file);
- STACK_WIND (frame, ra_flush_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush,
- fd);
- return 0;
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
+ }
+
+ STACK_WIND (frame, ra_flush_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->flush, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL);
return 0;
}
int
-ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
+ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "fsync received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1);
- }
+ fd_ctx_get (fd, this, &tmp_file);
- STACK_WIND (frame, ra_fsync_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
- return 0;
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
+ }
+
+ STACK_WIND (frame, ra_fsync_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync, fd, datasync, xdata);
+ return 0;
unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
+ ra_file_t *file = NULL;
- fd = frame->local;
+ GF_ASSERT (frame);
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ file = frame->local;
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
int
ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "writev received on fd with"
- "no file set in its context");
- goto unwind;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ frame->local = file;
+ /* reset the read-ahead counters too */
+ file->expected = file->page_count = 0;
}
- flush_region (frame, file, 0, file->pages.prev->offset+1);
-
- /* reset the read-ahead counters too */
- file->expected = file->page_count = 0;
-
- frame->local = fd;
-
- STACK_WIND (frame, ra_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ STACK_WIND (frame, ra_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
ra_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *prebuf,
- struct stat *postbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
- return 0;
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
int
ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
- return 0;
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = loc->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
+ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
+
+ inode = loc->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_ftruncate.
+ */
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_truncate_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+void
+ra_page_dump (struct ra_page *page)
+{
+ int i = 0;
+ call_frame_t *frame = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ ra_waitq_t *trav = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+
+ gf_proc_dump_write ("size", "%"PRId64, page->size);
+
+ gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+
+ gf_proc_dump_write ("poisoned", "%s", page->poisoned ? "yes" : "no");
+
+ gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+
+ for (trav = page->waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ sprintf (key, "waiting-frame[%d]", i++);
+ gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
}
- UNLOCK (&inode->lock);
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- loc, offset);
- return 0;
+out:
+ return;
}
+int32_t
+ra_fdctx_dump (xlator_t *this, fd_t *fd)
+{
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ int32_t ret = 0, i = 0;
+ uint64_t tmp_file = 0;
+ char *path = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_build_key (key_prefix,
+ "xlator.performance.read-ahead",
+ "file");
+
+ gf_proc_dump_add_section (key_prefix);
+
+ ret = __inode_path (fd->inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("fd", "%p", fd);
+
+ gf_proc_dump_write ("disabled", "%s", file->disabled ? "yes" : "no");
+
+ if (file->disabled) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_write ("page-size", "%"PRId64, file->page_size);
+
+ gf_proc_dump_write ("page-count", "%u", file->page_count);
+
+ gf_proc_dump_write ("next-expected-offset-for-sequential-reads",
+ "%"PRId64, file->offset);
+
+ for (page = file->pages.next; page != &file->pages;
+ page = page->next) {
+ sprintf (key, "page[%d]", i);
+ gf_proc_dump_write (key, "%p", page[i++]);
+ ra_page_dump (page);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
int
-ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 0);
+ }
+ }
+ UNLOCK (&inode->lock);
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat,
- fd);
- return 0;
+ STACK_WIND (frame, ra_attr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fstat, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_truncate.
+ */
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+static int
+ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_discard_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+static int
+ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill, fd,
+ offset, len, xdata);
+ return 0;
- STACK_WIND (frame, ra_truncate_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- fd, offset);
- return 0;
+unwind:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
ra_priv_dump (xlator_t *this)
{
- ra_conf_t *conf = NULL;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ gf_boolean_t add_section = _gf_false;
- if (!this)
- return -1;
+ if (!this) {
+ goto out;
+ }
conf = this->private;
if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
+ gf_log (this->name, GF_LOG_WARNING, "conf null in xlator");
+ goto out;
}
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.read-ahead",
+ "priv");
+
+ gf_proc_dump_add_section (key_prefix);
+ add_section = _gf_true;
+
ret = pthread_mutex_trylock (&conf->conf_lock);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Unable to lock client %s"
- " errno: %d", this->name, errno);
- return -1;
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("page_size", "%d", conf->page_size);
+ gf_proc_dump_write ("page_count", "%d", conf->page_count);
+ gf_proc_dump_write ("force_atime_update", "%d",
+ conf->force_atime_update);
}
+ pthread_mutex_unlock (&conf->conf_lock);
+ ret = 0;
+out:
+ if (ret && conf) {
+ if (add_section == _gf_false)
+ gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_build_key (key_prefix,
- "xlator.performance.read-ahead",
- "priv");
+ gf_proc_dump_write ("Unable to dump priv",
+ "(Lock acquisition failed) %s", this->name);
+ }
+ return ret;
+}
- gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_build_key (key, key_prefix, "page_size");
- gf_proc_dump_write (key, "%d", conf->page_size);
- gf_proc_dump_build_key (key, key_prefix, "page_count");
- gf_proc_dump_write (key, "%d", conf->page_count);
- gf_proc_dump_build_key (key, key_prefix, "force_atime_update");
- gf_proc_dump_write (key, "%d", conf->force_atime_update);
- pthread_mutex_unlock (&conf->conf_lock);
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
- return 0;
+ if (!this) {
+ goto out;
+ }
+
+ ret = xlator_mem_acct_init (this, gf_ra_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ }
+
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO ("read-ahead", this->private, out);
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out);
+
+ GF_OPTION_RECONF ("page-size", conf->page_size, options, size, out);
+
+ ret = 0;
+ out:
+ return ret;
}
int
init (xlator_t *this)
{
- ra_conf_t *conf = NULL;
- dict_t *options = this->options;
- char *page_count_string = NULL;
- int32_t ret = -1;
+ ra_conf_t *conf = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: read-ahead not configured with exactly one"
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: read-ahead not configured with exactly one"
" child");
goto out;
- }
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = (void *) CALLOC (1, sizeof (*conf));
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = (void *) GF_CALLOC (1, sizeof (*conf), gf_ra_mt_ra_conf_t);
if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
goto out;
}
- conf->page_size = this->ctx->page_size;
- conf->page_count = 4;
-
- if (dict_get (options, "page-count"))
- page_count_string = data_to_str (dict_get (options,
- "page-count"));
- if (page_count_string)
- {
- if (gf_string2uint_base10 (page_count_string, &conf->page_count)
- != 0)
- {
- gf_log ("read-ahead",
- GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "page-count\"",
- page_count_string);
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_count = %u",
- conf->page_count);
- }
-
- if (dict_get (options, "force-atime-update")) {
- char *force_atime_update_str = data_to_str (dict_get (options,
- "force-atime-update"));
- if (gf_string2boolean (force_atime_update_str,
- &conf->force_atime_update) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'force-atime-update' takes only boolean "
- "options");
- goto out;
- }
- if (conf->force_atime_update)
- gf_log (this->name, GF_LOG_DEBUG, "Forcing atime "
- "updates on cache hit");
- }
+ conf->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT ("page-size", conf->page_size, size, out);
+
+ GF_OPTION_INIT ("page-count", conf->page_count, uint32, out);
- conf->files.next = &conf->files;
- conf->files.prev = &conf->files;
+ GF_OPTION_INIT ("force-atime-update", conf->force_atime_update, bool, out);
- pthread_mutex_init (&conf->conf_lock, NULL);
- this->private = conf;
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
+
+ pthread_mutex_init (&conf->conf_lock, NULL);
+
+ this->local_pool = mem_pool_new (ra_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = conf;
ret = 0;
out:
if (ret == -1) {
- if (conf != NULL) {
- FREE (conf);
- }
+ GF_FREE (conf);
}
return ret;
}
+
void
fini (xlator_t *this)
{
- ra_conf_t *conf = this->private;
+ ra_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+
+ conf = this->private;
+ if (conf == NULL) {
+ goto out;
+ }
+
+ this->private = NULL;
- pthread_mutex_destroy (&conf->conf_lock);
- FREE (conf);
+ GF_ASSERT ((conf->files.next == &conf->files)
+ && (conf->files.prev == &conf->files));
- this->private = NULL;
- return;
+ pthread_mutex_destroy (&conf->conf_lock);
+ GF_FREE (conf);
+
+out:
+ return;
}
struct xlator_fops fops = {
- .open = ra_open,
- .create = ra_create,
- .readv = ra_readv,
- .writev = ra_writev,
- .flush = ra_flush,
- .fsync = ra_fsync,
- .truncate = ra_truncate,
- .ftruncate = ra_ftruncate,
- .fstat = ra_fstat,
-};
-
-struct xlator_mops mops = {
+ .open = ra_open,
+ .create = ra_create,
+ .readv = ra_readv,
+ .writev = ra_writev,
+ .flush = ra_flush,
+ .fsync = ra_fsync,
+ .truncate = ra_truncate,
+ .ftruncate = ra_ftruncate,
+ .fstat = ra_fstat,
+ .discard = ra_discard,
+ .zerofill = ra_zerofill,
};
struct xlator_cbks cbks = {
- .release = ra_release,
+ .release = ra_release,
};
struct xlator_dumpops dumpops = {
.priv = ra_priv_dump,
+ .fdctx = ra_fdctx_dump,
};
struct volume_options options[] = {
- { .key = {"force-atime-update"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"page-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 16
+ { .key = {"force-atime-update"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"page-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 16,
+ .default_value = "4",
+ .description = "Number of pages that will be pre-fetched"
+ },
+ { .key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 1048576 * 64,
+ .default_value = "131072",
+ .description = "Page size with which read-ahead performs server I/O"
},
- { .key = {NULL} },
+ { .key = {NULL} },
};
diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h
index d11143551..d1d768c34 100644
--- a/xlators/performance/read-ahead/src/read-ahead.h
+++ b/xlators/performance/read-ahead/src/read-ahead.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __READ_AHEAD_H
@@ -31,6 +22,7 @@
#include "dict.h"
#include "xlator.h"
#include "common-utils.h"
+#include "read-ahead-mem-types.h"
struct ra_conf;
struct ra_local;
@@ -40,77 +32,79 @@ struct ra_waitq;
struct ra_waitq {
- struct ra_waitq *next;
- void *data;
+ struct ra_waitq *next;
+ void *data;
};
struct ra_fill {
- struct ra_fill *next;
- struct ra_fill *prev;
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
+ struct ra_fill *next;
+ struct ra_fill *prev;
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
struct iobref *iobref;
};
struct ra_local {
- mode_t mode;
- struct ra_fill fill;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- off_t pending_offset;
- size_t pending_size;
- fd_t *fd;
- int32_t wait_count;
- pthread_mutex_t local_lock;
+ mode_t mode;
+ struct ra_fill fill;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ off_t pending_offset;
+ size_t pending_size;
+ fd_t *fd;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
};
struct ra_page {
- struct ra_page *next;
- struct ra_page *prev;
- struct ra_file *file;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ra_waitq *waitq;
+ struct ra_page *next;
+ struct ra_page *prev;
+ struct ra_file *file;
+ char dirty; /* Internal request, not from user. */
+ char poisoned; /* Pending read invalidated by write. */
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ra_waitq *waitq;
struct iobref *iobref;
+ char stale;
};
struct ra_file {
- struct ra_file *next;
- struct ra_file *prev;
- struct ra_conf *conf;
- fd_t *fd;
- int disabled;
- size_t expected;
- struct ra_page pages;
- off_t offset;
- size_t size;
- int32_t refcount;
- pthread_mutex_t file_lock;
- struct stat stbuf;
- uint64_t page_size;
- uint32_t page_count;
+ struct ra_file *next;
+ struct ra_file *prev;
+ struct ra_conf *conf;
+ fd_t *fd;
+ int disabled;
+ size_t expected;
+ struct ra_page pages;
+ off_t offset;
+ size_t size;
+ int32_t refcount;
+ pthread_mutex_t file_lock;
+ struct iatt stbuf;
+ uint64_t page_size;
+ uint32_t page_count;
};
struct ra_conf {
- uint64_t page_size;
- uint32_t page_count;
- void *cache_block;
- struct ra_file files;
- gf_boolean_t force_atime_update;
- pthread_mutex_t conf_lock;
+ uint64_t page_size;
+ uint32_t page_count;
+ void *cache_block;
+ struct ra_file files;
+ gf_boolean_t force_atime_update;
+ pthread_mutex_t conf_lock;
};
@@ -123,19 +117,19 @@ typedef struct ra_fill ra_fill_t;
ra_page_t *
ra_page_get (ra_file_t *file,
- off_t offset);
+ off_t offset);
ra_page_t *
ra_page_create (ra_file_t *file,
- off_t offset);
+ off_t offset);
void
ra_page_fault (ra_file_t *file,
- call_frame_t *frame,
- off_t offset);
+ call_frame_t *frame,
+ off_t offset);
void
ra_wait_on_page (ra_page_t *page,
- call_frame_t *frame);
+ call_frame_t *frame);
ra_waitq_t *
ra_page_wakeup (ra_page_t *page);
@@ -145,8 +139,8 @@ ra_page_flush (ra_page_t *page);
ra_waitq_t *
ra_page_error (ra_page_t *page,
- int32_t op_ret,
- int32_t op_errno);
+ int32_t op_ret,
+ int32_t op_errno);
void
ra_page_purge (ra_page_t *page);
@@ -155,7 +149,7 @@ ra_frame_return (call_frame_t *frame);
void
ra_frame_fill (ra_page_t *page,
- call_frame_t *frame);
+ call_frame_t *frame);
void
ra_file_destroy (ra_file_t *file);
@@ -163,36 +157,36 @@ ra_file_destroy (ra_file_t *file);
static inline void
ra_file_lock (ra_file_t *file)
{
- pthread_mutex_lock (&file->file_lock);
+ pthread_mutex_lock (&file->file_lock);
}
static inline void
ra_file_unlock (ra_file_t *file)
{
- pthread_mutex_unlock (&file->file_lock);
+ pthread_mutex_unlock (&file->file_lock);
}
static inline void
ra_conf_lock (ra_conf_t *conf)
{
- pthread_mutex_lock (&conf->conf_lock);
+ pthread_mutex_lock (&conf->conf_lock);
}
static inline void
ra_conf_unlock (ra_conf_t *conf)
{
- pthread_mutex_unlock (&conf->conf_lock);
+ pthread_mutex_unlock (&conf->conf_lock);
}
static inline void
ra_local_lock (ra_local_t *local)
{
- pthread_mutex_lock (&local->local_lock);
+ pthread_mutex_lock (&local->local_lock);
}
static inline void
ra_local_unlock (ra_local_t *local)
{
- pthread_mutex_unlock (&local->local_lock);
+ pthread_mutex_unlock (&local->local_lock);
}
#endif /* __READ_AHEAD_H */