summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishir gowda <sgowda@redhat.com>2012-08-23 18:34:09 +0530
committerVijay Bellur <vbellur@redhat.com>2012-08-27 08:08:45 -0700
commit10617e6cbc73329f259b471327d88375352042b0 (patch)
tree8fd7b0a1a17763730f11208908263baa8897df6f
parentd0c52c8064a4097d97180240b36c53bfe98cac22 (diff)
storage/posix: implement native linux AIO support
Configurable via cli with "storage.linux-aio" settable option Backported Avati's patch http://review.gluster.org/#change,3627 BUG: 837495 Change-Id: Ia7c26f5734d34d341debd422a5c59bba31eef844 Signed-off-by: shishir gowda <sgowda@redhat.com> Reviewed-on: http://review.gluster.org/3849 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--configure.ac11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1
-rw-r--r--xlators/storage/posix/src/Makefile.am6
-rw-r--r--xlators/storage/posix/src/posix-aio.c519
-rw-r--r--xlators/storage/posix/src/posix-aio.h49
-rw-r--r--xlators/storage/posix/src/posix-helpers.c26
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h1
-rw-r--r--xlators/storage/posix/src/posix.c48
-rw-r--r--xlators/storage/posix/src/posix.h13
9 files changed, 668 insertions, 6 deletions
diff --git a/configure.ac b/configure.ac
index dd19ac0..9f9ca9f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -519,6 +519,15 @@ if test "x$RLLIBS" != "x"; then
BUILD_READLINE=yes
fi
+BUILD_LIBAIO=no
+AC_CHECK_LIB([aio],[io_setup],[LIBAIO="-laio"])
+
+if test "x$LIBAIO" != "x"; then
+ AC_DEFINE(HAVE_LIBAIO, 1, [libaio based POSIX enabled])
+ BUILD_LIBAIO=yes
+fi
+
+
AC_SUBST(GF_HOST_OS)
AC_SUBST(GF_GLUSTERFS_LDFLAGS)
AC_SUBST(GF_GLUSTERFS_CFLAGS)
@@ -528,6 +537,7 @@ AC_SUBST(GF_LDADD)
AC_SUBST(GF_FUSE_LDADD)
AC_SUBST(GF_FUSE_CFLAGS)
AC_SUBST(RLLIBS)
+AC_SUBST(LIBAIO)
AC_SUBST(AM_MAKEFLAGS)
AC_SUBST(AM_LIBTOOLFLAGS)
@@ -553,4 +563,5 @@ echo "argp-standalone : $BUILD_ARGP_STANDALONE"
echo "fusermount : $BUILD_FUSERMOUNT"
echo "readline : $BUILD_READLINE"
echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
echo
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 198d91a..81f5dda 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -229,6 +229,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"features.grace-timeout", "protocol/client", "grace-timeout", NULL, NO_DOC, 0},
{"features.grace-timeout", "protocol/server", "grace-timeout", NULL, DOC, 0},
{"feature.read-only", "features/read-only", "!read-only", "off", DOC, 0},
+ {"storage.linux-aio", "storage/posix", NULL, NULL, DOC, 0},
{NULL, }
};
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index 408dcb8..4eae979 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -4,10 +4,10 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
posix_la_LDFLAGS = -module -avoidversion
-posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c
-posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c
+posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO)
-noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h
+noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h
AM_CFLAGS = -fPIC -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
-D$(GF_HOST_OS) -Wall -I$(top_srcdir)/libglusterfs/src -shared \
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
new file mode 100644
index 0000000..ac0ce87
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -0,0 +1,519 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "posix.h"
+#include <sys/uio.h>
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+
+
+struct posix_aio_cb {
+ struct iocb iocb;
+ call_frame_t *frame;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ struct iatt prebuf;
+ int fd;
+ int op;
+ off_t offset;
+};
+
+
+int
+posix_aio_readv_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iatt prebuf = {0,};
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ struct iovec iov;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ iobuf = paiocb->iobuf;
+ prebuf = paiocb->prebuf;
+ _fd = paiocb->fd;
+ offset = paiocb->offset;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "readv(async) failed fd=%d,size=%lu,offset=%llu (%d/%s)",
+ _fd, paiocb->iocb.u.c.nbytes,
+ (unsigned long long) paiocb->offset,
+ res, strerror (op_errno));
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%d: %s", _fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = res;
+ op_errno = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = op_ret;
+
+
+ /* Hack to notify higher layers of EOF. */
+ if (postbuf.ia_size == 0)
+ op_errno = ENOENT;
+ else if ((offset + iov.iov_len) == postbuf.ia_size)
+ op_errno = ENOENT;
+ else if (offset > postbuf.ia_size)
+ op_errno = ENOENT;
+
+ LOCK (&priv->lock);
+ {
+ priv->read_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
+ &postbuf, iobref, NULL);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct iobuf *iobuf = NULL;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get_off (fd, this, &pfd, offset);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
+ goto err;
+ }
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->iobuf = iobuf;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_READ;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
+ paiocb->iocb.u.c.nbytes = size;
+ paiocb->iocb.u.c.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ if (paiocb)
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_writev_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iatt prebuf = {0,};
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ prebuf = paiocb->prebuf;
+ _fd = paiocb->fd;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "writev(async) failed fd=%d,offset=%llu (%d/%s)",
+ _fd, (unsigned long long) paiocb->offset, res,
+ strerror (op_errno));
+
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%d: %s", _fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+
+ op_ret = res;
+ op_errno = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+int
+posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *iov, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get_off (fd, this, &pfd, offset);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_WRITE;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iobref = iobref_ref (iobref);
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.v.vec = iov;
+ paiocb->iocb.u.v.nr = count;
+ paiocb->iocb.u.v.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ ret = posix_fdstat (this, _fd, &paiocb->prebuf);
+ if (ret != 0) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%p: %s", fd,
+ strerror (op_errno));
+ goto err;
+ }
+
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+void *
+posix_aio_thread (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS];
+ struct io_event *event = NULL;
+ struct posix_aio_cb *paiocb = NULL;
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ for (;;) {
+ memset (&events[0], 0, sizeof (events));
+ ret = io_getevents (priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS,
+ &events[0], NULL);
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_getevents() returned %d", ret);
+ if (ret == -EINTR)
+ continue;
+ break;
+ }
+
+ for (i = 0; i < ret; i++) {
+ event = &events[i];
+
+ paiocb = event->data;
+
+ switch (paiocb->op) {
+ case GF_FOP_READ:
+ posix_aio_readv_complete (paiocb, event->res,
+ event->res2);
+ break;
+ case GF_FOP_WRITE:
+ posix_aio_writev_complete (paiocb, event->res,
+ event->res2);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown op %d found in piocb",
+ paiocb->op);
+ break;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+int
+posix_aio_init (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = io_setup (POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp);
+ if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Linux AIO not availble at run-time."
+ " Continuing with synchronous IO");
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "io_setup() failed. ret=%d, errno=%d",
+ ret, errno);
+ goto out;
+ }
+
+ ret = pthread_create (&priv->aiothread, NULL,
+ posix_aio_thread, this);
+ if (ret != 0) {
+ io_destroy (priv->ctxp);
+ goto out;
+ }
+
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+out:
+ return ret;
+}
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->aio_init_done) {
+ ret = posix_aio_init (this);
+ if (ret == 0)
+ priv->aio_capable = _gf_true;
+ else
+ priv->aio_capable = _gf_false;
+ priv->aio_init_done = _gf_true;
+ }
+
+ if (priv->aio_capable) {
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+ }
+
+ return ret;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ this->fops->readv = posix_readv;
+ this->fops->writev = posix_writev;
+
+ return 0;
+}
+
+
+#else
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not availble at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not availble at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+#endif
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
new file mode 100644
index 0000000..545130a
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _POSIX_AIO_H
+#define _POSIX_AIO_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+// Maximum number of concurrently submitted IO events. The heaviest load
+// GlusterFS has been able to handle had 60-80 concurrent calls
+#define POSIX_AIO_MAX_NR_EVENTS 256
+
+// Maximum number of completed IO operations to reap per getevents syscall
+#define POSIX_AIO_MAX_NR_GETEVENTS 16
+
+
+int posix_aio_on (xlator_t *this);
+int posix_aio_off (xlator_t *this);
+
+int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_POSIX_AIO_H */
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index f4dcc70..0e291cb 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1066,5 +1066,29 @@ int
posix_fd_ctx_get_off (fd_t *fd, xlator_t *this, struct posix_fd **pfd,
off_t offset)
{
- return posix_fd_ctx_get (fd, this, pfd);
+ int ret;
+ int flags;
+
+ LOCK (&fd->inode->lock);
+ {
+ ret = __posix_fd_ctx_get (fd, this, pfd);
+ if (ret)
+ goto unlock;
+
+ if ((offset & 0xfff) && (*pfd)->odirect) {
+ flags = fcntl ((*pfd)->fd, F_GETFL);
+ ret = fcntl ((*pfd)->fd, F_SETFL, (flags & (~O_DIRECT)));
+ (*pfd)->odirect = 0;
+ }
+
+ if (((offset & 0xfff) == 0) && (!(*pfd)->odirect)) {
+ flags = fcntl ((*pfd)->fd, F_GETFL);
+ ret = fcntl ((*pfd)->fd, F_SETFL, (flags | O_DIRECT));
+ (*pfd)->odirect = 1;
+ }
+ }
+unlock:
+ UNLOCK (&fd->inode->lock);
+
+ return ret;
}
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index 10aa75e..b8d8124 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -30,6 +30,7 @@ enum gf_posix_mem_types_ {
gf_posix_mt_int32_t,
gf_posix_mt_posix_dev_t,
gf_posix_mt_trash_path,
+ gf_posix_mt_paiocb,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index cedf953..9082b40 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -59,6 +59,7 @@
#include "timer.h"
#include "glusterfs3-xdr.h"
#include "hashfn.h"
+#include "posix-aio.h"
extern char *marker_xattrs[];
@@ -3960,6 +3961,29 @@ mem_acct_init (xlator_t *this)
return ret;
}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
+ options, bool, out);
+
+ if (priv->aio_configured)
+ posix_aio_on (this);
+ else
+ posix_aio_off (this);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
/**
* init -
*/
@@ -4300,7 +4324,23 @@ init (xlator_t *this)
"Posix handle setup failed");
ret = -1;
goto out;
- }
+ }
+
+ _private->aio_init_done = _gf_false;
+ _private->aio_capable = _gf_false;
+
+ GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out);
+
+ if (_private->aio_configured) {
+ op_ret = posix_aio_on (this);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Posix AIO init failed");
+ ret = -1;
+ goto out;
+ }
+ }
pthread_mutex_init (&_private->janitor_lock, NULL);
pthread_cond_init (&_private->janitor_cond, NULL);
@@ -4399,5 +4439,11 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_ANY },
{ .key = {"glusterd-uuid"},
.type = GF_OPTION_TYPE_STR },
+ {
+ .key = {"linux-aio"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Support for native Linux AIO"
+ },
{ .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 7c2b47b..ce5c94c 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -54,6 +54,11 @@
#include "posix-mem-types.h"
#include "posix-handle.h"
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#include "posix-aio.h"
+#endif
+
/**
* posix_fd - internal structure common to file and directory fd's
*/
@@ -64,7 +69,6 @@ struct posix_fd {
DIR * dir; /* handle returned by the kernel */
int flushwrites;
int odirect;
- int op_performed;
struct list_head list; /* to add to the janitor list */
};
@@ -124,6 +128,13 @@ struct posix_private {
/* uuid of glusterd that swapned the brick process */
uuid_t glusterd_uuid;
+ gf_boolean_t aio_configured;
+ gf_boolean_t aio_init_done;
+ gf_boolean_t aio_capable;
+#ifdef HAVE_LIBAIO
+ io_context_t ctxp;
+ pthread_t aiothread;
+#endif
};
#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path)