summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/posix/src')
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c319
-rw-r--r--xlators/storage/posix/src/posix-handle.h137
-rw-r--r--xlators/storage/posix/src/posix-helpers.c789
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c2665
-rw-r--r--xlators/storage/posix/src/posix.h117
9 files changed, 3903 insertions, 770 deletions
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index 408dcb80d..88efcc784 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -2,17 +2,18 @@
xlator_LTLIBRARIES = posix.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
-posix_la_LDFLAGS = -module -avoidversion
+posix_la_LDFLAGS = -module -avoid-version
-posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c
-posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c
+posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO)
-noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h
+noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h
-AM_CFLAGS = -fPIC -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -D$(GF_HOST_OS) -Wall -I$(top_srcdir)/libglusterfs/src -shared \
- -nostartfiles -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
new file mode 100644
index 000000000..c3bbddd67
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -0,0 +1,569 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "posix.h"
+#include <sys/uio.h>
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ int odirect = 0;
+ int flags = 0;
+ int ret = 0;
+
+ odirect = pfd->odirect;
+
+ if ((fd->flags|opflags) & O_DIRECT) {
+ /* if instructed, use O_DIRECT always */
+ odirect = 1;
+ } else {
+ /* else use O_DIRECT when feasible */
+ if ((offset|size) & 0xfff)
+ odirect = 0;
+ else
+ odirect = 1;
+ }
+
+ if (!odirect && pfd->odirect) {
+ flags = fcntl (pfd->fd, F_GETFL);
+ ret = fcntl (pfd->fd, F_SETFL, (flags & (~O_DIRECT)));
+ pfd->odirect = 0;
+ }
+
+ if (odirect && !pfd->odirect) {
+ flags = fcntl (pfd->fd, F_GETFL);
+ ret = fcntl (pfd->fd, F_SETFL, (flags | O_DIRECT));
+ pfd->odirect = 1;
+ }
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d",
+ strerror (errno), pfd->fd, flags, pfd->odirect);
+ }
+}
+
+
+struct posix_aio_cb {
+ struct iocb iocb;
+ call_frame_t *frame;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ struct iatt prebuf;
+ int fd;
+ int op;
+ off_t offset;
+};
+
+
+int
+posix_aio_readv_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ struct iovec iov;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ iobuf = paiocb->iobuf;
+ _fd = paiocb->fd;
+ offset = paiocb->offset;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "readv(async) failed fd=%d,size=%lu,offset=%llu (%d/%s)",
+ _fd, paiocb->iocb.u.c.nbytes,
+ (unsigned long long) paiocb->offset,
+ res, strerror (op_errno));
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%d: %s", _fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = res;
+ op_errno = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = op_ret;
+
+
+ /* Hack to notify higher layers of EOF. */
+ if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
+ op_errno = ENOENT;
+
+ LOCK (&priv->lock);
+ {
+ priv->read_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
+ &postbuf, iobref, NULL);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct iobuf *iobuf = NULL;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
+ goto err;
+ }
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->iobuf = iobuf;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_READ;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
+ paiocb->iocb.u.c.nbytes = size;
+ paiocb->iocb.u.c.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ LOCK (&fd->lock);
+ {
+ __posix_fd_set_odirect (fd, pfd, flags, offset, size);
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ if (paiocb)
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_writev_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iatt prebuf = {0,};
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ prebuf = paiocb->prebuf;
+ _fd = paiocb->fd;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "writev(async) failed fd=%d,offset=%llu (%d/%s)",
+ _fd, (unsigned long long) paiocb->offset, res,
+ strerror (op_errno));
+
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%d: %s", _fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+
+ op_ret = res;
+ op_errno = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+int
+posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *iov, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_WRITE;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iobref = iobref_ref (iobref);
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.v.vec = iov;
+ paiocb->iocb.u.v.nr = count;
+ paiocb->iocb.u.v.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ ret = posix_fdstat (this, _fd, &paiocb->prebuf);
+ if (ret != 0) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%p: %s", fd,
+ strerror (op_errno));
+ goto err;
+ }
+
+
+ LOCK (&fd->lock);
+ {
+ __posix_fd_set_odirect (fd, pfd, flags, offset,
+ iov_length (iov, count));
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+void *
+posix_aio_thread (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS];
+ struct io_event *event = NULL;
+ struct posix_aio_cb *paiocb = NULL;
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ for (;;) {
+ memset (&events[0], 0, sizeof (events));
+ ret = io_getevents (priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS,
+ &events[0], NULL);
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_getevents() returned %d", ret);
+ if (ret == -EINTR)
+ continue;
+ break;
+ }
+
+ for (i = 0; i < ret; i++) {
+ event = &events[i];
+
+ paiocb = event->data;
+
+ switch (paiocb->op) {
+ case GF_FOP_READ:
+ posix_aio_readv_complete (paiocb, event->res,
+ event->res2);
+ break;
+ case GF_FOP_WRITE:
+ posix_aio_writev_complete (paiocb, event->res,
+ event->res2);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown op %d found in piocb",
+ paiocb->op);
+ break;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+int
+posix_aio_init (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = io_setup (POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp);
+ if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Linux AIO not available at run-time."
+ " Continuing with synchronous IO");
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "io_setup() failed. ret=%d, errno=%d",
+ ret, errno);
+ goto out;
+ }
+
+ ret = gf_thread_create (&priv->aiothread, NULL,
+ posix_aio_thread, this);
+ if (ret != 0) {
+ io_destroy (priv->ctxp);
+ goto out;
+ }
+
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+out:
+ return ret;
+}
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->aio_init_done) {
+ ret = posix_aio_init (this);
+ if (ret == 0)
+ priv->aio_capable = _gf_true;
+ else
+ priv->aio_capable = _gf_false;
+ priv->aio_init_done = _gf_true;
+ }
+
+ if (priv->aio_capable) {
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+ }
+
+ return ret;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ this->fops->readv = posix_readv;
+ this->fops->writev = posix_writev;
+
+ return 0;
+}
+
+
+#else
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ xlator_t *this = THIS;
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return;
+}
+#endif
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
new file mode 100644
index 000000000..5bde71601
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _POSIX_AIO_H
+#define _POSIX_AIO_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+// Maximum number of concurrently submitted IO events. The heaviest load
+// GlusterFS has been able to handle had 60-80 concurrent calls
+#define POSIX_AIO_MAX_NR_EVENTS 256
+
+// Maximum number of completed IO operations to reap per getevents syscall
+#define POSIX_AIO_MAX_NR_GETEVENTS 16
+
+
+int posix_aio_on (xlator_t *this);
+int posix_aio_off (xlator_t *this);
+
+int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_POSIX_AIO_H */
diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c
index 9b6df445e..adb8acc07 100644
--- a/xlators/storage/posix/src/posix-handle.c
+++ b/xlators/storage/posix/src/posix-handle.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -27,18 +17,176 @@
#include <sys/stat.h>
#include <unistd.h>
#include <libgen.h>
+#ifdef GF_LINUX_HOST_OS
#include <alloca.h>
+#endif
#include "posix-handle.h"
#include "posix.h"
#include "xlator.h"
+#include "syscall.h"
+
+inode_t *
+posix_resolve (xlator_t *this, inode_table_t *itable, inode_t *parent,
+ char *bname, struct iatt *iabuf)
+{
+ inode_t *inode = NULL, *linked_inode = NULL;
+ int ret = -1;
+
+ ret = posix_istat (this, parent->gfid, bname, iabuf);
+ if (ret < 0)
+ goto out;
+
+ inode = inode_find (itable, iabuf->ia_gfid);
+ if (inode == NULL) {
+ inode = inode_new (itable);
+ }
+
+ linked_inode = inode_link (inode, parent, bname, iabuf);
+ inode_unref (inode);
-#define HANDLE_PFX ".glusterfs"
+out:
+ return linked_inode;
+}
-#define UUID0_STR "00000000-0000-0000-0000-000000000000"
-#define SLEN(str) (sizeof(str) - 1)
+int
+posix_make_ancestral_node (const char *priv_base_path, char *path, int pathsize,
+ gf_dirent_t *head,
+ char *dir_name, struct iatt *iabuf, inode_t *inode,
+ int type, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ char real_path[PATH_MAX + 1] = {0, }, len = 0;
+ loc_t loc = {0, };
+ int ret = -1;
+ len = strlen (path) + strlen (dir_name) + 1;
+ if (len > pathsize) {
+ goto out;
+ }
+
+ strcat (path, dir_name);
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ entry = gf_dirent_for_name (dir_name);
+ if (!entry) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not create gf_dirent for entry %s: (%s)",
+ dir_name, strerror (errno));
+ goto out;
+ }
+
+ entry->d_stat = *iabuf;
+ entry->inode = inode_ref (inode);
+
+ list_add_tail (&entry->list, &head->list);
+ strcpy (real_path, priv_base_path);
+ strcat (real_path, "/");
+ strcat (real_path, path);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ entry->dict = posix_lookup_xattr_fill (THIS, real_path, &loc,
+ xdata, iabuf);
+ loc_wipe (&loc);
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize,
+ gf_dirent_t *head, int type, uuid_t gfid,
+ const size_t handle_size,
+ const char *priv_base_path, inode_table_t *itable,
+ inode_t **parent, dict_t *xdata)
+{
+ char *linkname = NULL; /* "../../<gfid[0]>/<gfid[1]/"
+ "<gfidstr>/<NAME_MAX>" */
+ char *dir_handle = NULL;
+ char *dir_name = NULL;
+ char *pgfidstr = NULL;
+ char *saveptr = NULL;
+ ssize_t len = 0;
+ inode_t *inode = NULL;
+ struct iatt iabuf = {0, };
+ int ret = -1;
+ uuid_t tmp_gfid = {0, };
+
+ if (!path || !parent || !priv_base_path || uuid_is_null (gfid)) {
+ goto out;
+ }
+
+ if (__is_root_gfid (gfid)) {
+ if (parent) {
+ if (*parent) {
+ inode_unref (*parent);
+ }
+
+ *parent = inode_ref (itable->root);
+ }
+
+ inode = itable->root;
+
+ memset (&iabuf, 0, sizeof (iabuf));
+ uuid_copy (iabuf.ia_gfid, inode->gfid);
+ iabuf.ia_type = inode->ia_type;
+
+ ret = posix_make_ancestral_node (priv_base_path, path, pathsize,
+ head, "/", &iabuf, inode, type,
+ xdata);
+ return ret;
+ }
+
+ dir_handle = alloca (handle_size);
+ linkname = alloca (PATH_MAX);
+ snprintf (dir_handle, handle_size, "%s/%s/%02x/%02x/%s",
+ priv_base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
+ uuid_utoa (gfid));
+
+ len = readlink (dir_handle, linkname, PATH_MAX);
+ if (len < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "could not read the link "
+ "from the gfid handle %s (%s)", dir_handle,
+ strerror (errno));
+ goto out;
+ }
+
+ linkname[len] = '\0';
+
+ pgfidstr = strtok_r (linkname + SLEN("../../00/00/"), "/", &saveptr);
+ dir_name = strtok_r (NULL, "/", &saveptr);
+ strcat (dir_name, "/");
+
+ uuid_parse (pgfidstr, tmp_gfid);
+
+ ret = posix_make_ancestryfromgfid (this, path, pathsize, head, type,
+ tmp_gfid, handle_size,
+ priv_base_path, itable, parent,
+ xdata);
+ if (ret < 0) {
+ goto out;
+ }
+
+ memset (&iabuf, 0, sizeof (iabuf));
+
+ inode = posix_resolve (this, itable, *parent, dir_name, &iabuf);
+
+ ret = posix_make_ancestral_node (priv_base_path, path, pathsize, head,
+ dir_name, &iabuf, inode, type, xdata);
+ if (*parent != NULL) {
+ inode_unref (*parent);
+ }
+
+ *parent = inode;
+
+out:
+ return ret;
+}
int
posix_handle_relpath (xlator_t *this, uuid_t gfid, const char *basename,
@@ -104,7 +252,7 @@ posix_handle_pump (xlator_t *this, char *buf, int len, int maxlen,
if ((ret == 8) && memcmp (linkname, "../../..", 8) == 0) {
if (strcmp (base_str, buf) == 0) {
- strncpy (buf + pfx_len, "..", 3);
+ strcpy (buf + pfx_len, "..");
}
goto out;
}
@@ -195,13 +343,13 @@ posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename,
buf = alloca (maxlen);
}
- base_len = (priv->base_path_length + SLEN(HANDLE_PFX) + 45);
+ base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45);
base_str = alloca (base_len + 1);
base_len = snprintf (base_str, base_len + 1, "%s/%s/%02x/%02x/%s",
- priv->base_path, HANDLE_PFX, gfid[0], gfid[1],
+ priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
uuid_str);
- pfx_len = priv->base_path_length + 1 + SLEN(HANDLE_PFX) + 1;
+ pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1;
if (basename) {
len = snprintf (buf, maxlen, "%s/%s", base_str, basename);
@@ -243,7 +391,7 @@ posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
len = priv->base_path_length /* option directory "/export" */
+ SLEN("/")
- + SLEN(HANDLE_PFX)
+ + SLEN(GF_HIDDEN_PATH)
+ SLEN("/")
+ SLEN("00/")
+ SLEN("00/")
@@ -274,10 +422,10 @@ posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
if (basename) {
len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path,
- HANDLE_PFX, gfid[0], gfid[1], uuid_str, basename);
+ GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str, basename);
} else {
len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path,
- HANDLE_PFX, gfid[0], gfid[1], uuid_str);
+ GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str);
}
out:
return len;
@@ -306,10 +454,10 @@ posix_handle_init (xlator_t *this)
return -1;
}
- handle_pfx = alloca (priv->base_path_length + 1 + strlen (HANDLE_PFX)
+ handle_pfx = alloca (priv->base_path_length + 1 + strlen (GF_HIDDEN_PATH)
+ 1);
- sprintf (handle_pfx, "%s/%s", priv->base_path, HANDLE_PFX);
+ sprintf (handle_pfx, "%s/%s", priv->base_path, GF_HIDDEN_PATH);
ret = stat (handle_pfx, &stbuf);
switch (ret) {
@@ -391,6 +539,107 @@ posix_handle_init (xlator_t *this)
return 0;
}
+gf_boolean_t
+posix_does_old_trash_exists (char *old_trash)
+{
+ uuid_t gfid = {0};
+ gf_boolean_t exists = _gf_false;
+ struct stat stbuf = {0};
+ int ret = 0;
+
+ ret = lstat (old_trash, &stbuf);
+ if ((ret == 0) && S_ISDIR (stbuf.st_mode)) {
+ ret = sys_lgetxattr (old_trash, "trusted.gfid", gfid, 16);
+ if ((ret < 0) && (errno == ENODATA))
+ exists = _gf_true;
+ }
+ return exists;
+}
+
+int
+posix_handle_new_trash_init (xlator_t *this, char *trash)
+{
+ int ret = 0;
+ struct stat stbuf = {0};
+
+ ret = lstat (trash, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno == ENOENT) {
+ ret = mkdir (trash, 0755);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Creating directory %s failed: %s",
+ trash, strerror (errno));
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Checking for %s "
+ "failed: %s", trash, strerror (errno));
+ }
+ break;
+ case 0:
+ if (!S_ISDIR (stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not a directory: %s", trash);
+ ret = -1;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int
+posix_mv_old_trash_into_new_trash (xlator_t *this, char *old, char *new)
+{
+ char dest_old[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t dest_name = {0};
+
+ if (!posix_does_old_trash_exists (old))
+ goto out;
+ uuid_generate (dest_name);
+ snprintf (dest_old, sizeof (dest_old), "%s/%s", new,
+ uuid_utoa (dest_name));
+ ret = rename (old, dest_old);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Not able to move "
+ "%s -> %s (%s)", old, dest_old, strerror (errno));
+ }
+out:
+ return ret;
+}
+
+int
+posix_handle_trash_init (xlator_t *this)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ char old_trash[PATH_MAX] = {0};
+
+ priv = this->private;
+
+ priv->trash_path = GF_CALLOC (1, priv->base_path_length + strlen ("/")
+ + strlen (GF_HIDDEN_PATH) + strlen ("/")
+ + strlen (TRASH_DIR) + 1,
+ gf_posix_mt_trash_path);
+
+ if (!priv->trash_path)
+ goto out;
+
+ strncpy (priv->trash_path, priv->base_path, priv->base_path_length);
+ strcat (priv->trash_path, "/" GF_HIDDEN_PATH "/" TRASH_DIR);
+ ret = posix_handle_new_trash_init (this, priv->trash_path);
+ if (ret)
+ goto out;
+ snprintf (old_trash, sizeof (old_trash), "%s/.landfill",
+ priv->base_path);
+ ret = posix_mv_old_trash_into_new_trash (this, old_trash,
+ priv->trash_path);
+out:
+ return ret;
+}
int
posix_handle_mkdir_hashes (xlator_t *this, const char *newpath)
@@ -452,7 +701,8 @@ posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat
return -1;
}
- ret = link (oldpath, newpath);
+ ret = sys_link (oldpath, newpath);
+
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"link %s -> %s failed (%s)",
@@ -469,13 +719,6 @@ posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat
}
}
- ret = lstat (newpath, &newbuf);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "lstat on %s failed (%s)", newpath, strerror (errno));
- return -1;
- }
-
if (newbuf.st_ino != oldbuf->st_ino ||
newbuf.st_dev != oldbuf->st_dev) {
gf_log (this->name, GF_LOG_WARNING,
@@ -631,7 +874,7 @@ posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
ret = lstat (newpath, &stbuf);
if (!ret) {
- ret = link (newpath, real_path);
+ ret = sys_link (newpath, real_path);
}
return ret;
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
index a8fd9228a..31cbf83fd 100644
--- a/xlators/storage/posix/src/posix-handle.h
+++ b/xlators/storage/posix/src/posix-handle.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _POSIX_HANDLE_H
#define _POSIX_HANDLE_H
@@ -27,9 +17,85 @@
#include <sys/types.h>
#include "xlator.h"
+#include "gf-dirent.h"
+
+#define TRASH_DIR "landfill"
+
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define SLEN(str) (sizeof(str) - 1)
+
+#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/'))
+
+#define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) do { \
+ var = alloca (strlen (prefix) + UUID_CANONICAL_FORM_LEN + 1); \
+ strcpy (var, prefix); \
+ strcat (var, uuid_utoa (pgfid)); \
+ } while (0)
+
+#define SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
+ value = hton32 (value); \
+ op_ret = sys_lsetxattr (path, key, &value, sizeof (value), \
+ flags); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_log (this->name, GF_LOG_WARNING, \
+ "setting xattr failed on %s: key = %s (%s)", \
+ path, key, strerror (op_errno)); \
+ goto label; \
+ } \
+ } while (0)
-#define LOC_HAS_ABSPATH(loc) ((loc) && (loc->path) && (loc->path[0] == '/'))
+#define REMOVE_PGFID_XATTR(path, key, op_ret, this, label) do { \
+ op_ret = sys_lremovexattr (path, key); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_log (this->name, GF_LOG_WARNING, "removing xattr " \
+ "failed on %s: key = %s (%s)", path, key, \
+ strerror (op_errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+/* should be invoked holding a lock */
+#define LINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
+ op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ if (op_errno == ENOATTR) { \
+ value = 1; \
+ } else { \
+ gf_log (this->name, GF_LOG_WARNING,"getting xattr " \
+ "failed on %s: key = %s (%s)", path, key, \
+ strerror (op_errno)); \
+ goto label; \
+ } \
+ } else { \
+ value = ntoh32 (value); \
+ value++; \
+ } \
+ SET_PGFID_XATTR (path, key, value, flags, op_ret, this, label); \
+ } while (0)
+
+/* should be invoked holding a lock */
+#define UNLINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
+ op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_log (this->name, GF_LOG_WARNING, "getting xattr failed on " \
+ "%s: key = %s (%s)", path, key, strerror (op_errno)); \
+ goto label; \
+ } else { \
+ value = ntoh32 (value); \
+ value--; \
+ if (value > 0) { \
+ SET_PGFID_XATTR (path, key, value, flags, op_ret, \
+ this, label); \
+ } else { \
+ REMOVE_PGFID_XATTR (path, key, op_ret, this, label); \
+ } \
+ } \
+ } while (0)
#define MAKE_REAL_PATH(var, this, path) do { \
var = alloca (strlen (path) + POSIX_BASE_PATH_LEN(this) + 2); \
@@ -37,7 +103,6 @@
strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \
} while (0)
-
#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \
int __len; \
__len = posix_handle_path (this, gfid, base, NULL, 0); \
@@ -71,7 +136,12 @@
#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \
if (uuid_is_null (loc->gfid)) { \
gf_log (this->name, GF_LOG_ERROR, \
- "null gfid for path %s", loc->path); \
+ "null gfid for path %s", (loc)->path); \
+ break; \
+ } \
+ if (LOC_HAS_ABSPATH (loc)) { \
+ MAKE_REAL_PATH (rpath, this, (loc)->path); \
+ op_ret = posix_pstat (this, (loc)->gfid, rpath, iatt_p); \
break; \
} \
errno = 0; \
@@ -81,11 +151,6 @@
break; \
} \
/* __ret == -1 && errno == ELOOP */ \
- if (LOC_HAS_ABSPATH (loc)) { \
- MAKE_REAL_PATH (rpath, this, loc->path); \
- op_ret = posix_pstat (this, loc->gfid, rpath, iatt_p); \
- break; \
- } \
} while (0)
@@ -98,6 +163,13 @@
break; \
} \
\
+ if (LOC_HAS_ABSPATH (loc)) { \
+ MAKE_REAL_PATH (entp, this, loc->path); \
+ __parp = strdupa (entp); \
+ parp = dirname (__parp); \
+ op_ret = posix_pstat (this, NULL, entp, ent_p); \
+ break; \
+ } \
errno = 0; \
op_ret = posix_istat (this, loc->pargfid, loc->name, ent_p); \
if (errno != ELOOP) { \
@@ -106,21 +178,24 @@
break; \
} \
/* __ret == -1 && errno == ELOOP */ \
- if (LOC_HAS_ABSPATH (loc)) { \
- MAKE_REAL_PATH (entp, this, loc->path); \
- __parp = strdupa (entp); \
- parp = dirname (__parp); \
- op_ret = posix_pstat (this, NULL, entp, ent_p); \
- break; \
- } \
/* expand ELOOP */ \
} while (0)
+#define POSIX_ANCESTRY_PATH (1 << 0)
+#define POSIX_ANCESTRY_DENTRY (1 << 1)
int
posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf,
size_t len);
+
+int
+posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize,
+ gf_dirent_t *head, int type, uuid_t gfid,
+ const size_t handle_size,
+ const char *priv_base_path,
+ inode_table_t *table, inode_t **parent,
+ dict_t *xdata);
int
posix_handle_path_safe (xlator_t *this, uuid_t gfid, const char *basename,
char *buf, size_t len);
@@ -148,4 +223,6 @@ int posix_handle_init (xlator_t *this);
int posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
char *real_path);
+int
+posix_handle_trash_init (xlator_t *this);
#endif /* !_POSIX_HANDLE_H */
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index f4334302f..ab46f7f7e 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -32,6 +22,7 @@
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
+#include <signal.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -54,25 +45,22 @@
#include "timer.h"
#include "glusterfs3-xdr.h"
#include "hashfn.h"
+#include "glusterfs-acl.h"
#include <fnmatch.h>
-typedef struct {
- xlator_t *this;
- const char *real_path;
- dict_t *xattr;
- struct iatt *stbuf;
- loc_t *loc;
-} posix_xattr_filler_t;
-
char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
"trusted.glusterfs.*.xtime",
NULL};
+char *marker_contri_key = "trusted.*.*.contri";
+
static char* posix_ignore_xattrs[] = {
"gfid-req",
GLUSTERFS_ENTRYLK_COUNT,
GLUSTERFS_INODELK_COUNT,
GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GF_GFIDLESS_LOOKUP,
NULL
};
@@ -117,15 +105,143 @@ out:
return ignore;
}
-static void
+static int
+_posix_xattr_get_set_from_backend (posix_xattr_filler_t *filler, char *key)
+{
+ ssize_t xattr_size = -1;
+ int ret = 0;
+ char *value = NULL;
+
+ xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
+
+ if (xattr_size > 0) {
+ value = GF_CALLOC (1, xattr_size + 1,
+ gf_posix_mt_char);
+ if (!value)
+ goto out;
+
+ xattr_size = sys_lgetxattr (filler->real_path, key, value,
+ xattr_size);
+ if (xattr_size <= 0) {
+ gf_log (filler->this->name, GF_LOG_WARNING,
+ "getxattr failed. path: %s, key: %s",
+ filler->real_path, key);
+ GF_FREE (value);
+ goto out;
+ }
+
+ value[xattr_size] = '\0';
+ ret = dict_set_bin (filler->xattr, key,
+ value, xattr_size);
+ if (ret < 0) {
+ gf_log (filler->this->name, GF_LOG_DEBUG,
+ "dict set failed. path: %s, key: %s",
+ filler->real_path, key);
+ GF_FREE (value);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int gf_posix_xattr_enotsup_log;
+
+static int
+_posix_get_marker_all_contributions (posix_xattr_filler_t *filler)
+{
+ ssize_t size = -1, remaining_size = -1, list_offset = 0;
+ int ret = -1;
+ char *list = NULL, key[4096] = {0, };
+
+ size = sys_llistxattr (filler->real_path, NULL, 0);
+ if (size == -1) {
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ THIS->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
+
+ } else {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "listxattr failed on %s: %s",
+ filler->real_path, strerror (errno));
+
+ }
+
+ goto out;
+ }
+
+ if (size == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ list = alloca (size + 1);
+ if (!list) {
+ goto out;
+ }
+
+ size = sys_llistxattr (filler->real_path, list, size);
+ if (size <= 0) {
+ ret = size;
+ goto out;
+ }
+
+ remaining_size = size;
+ list_offset = 0;
+
+ while (remaining_size > 0) {
+ if (*(list + list_offset) == '\0')
+ break;
+ strcpy (key, list + list_offset);
+ if (fnmatch (marker_contri_key, key, 0) == 0) {
+ ret = _posix_xattr_get_set_from_backend (filler, key);
+ }
+
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+_posix_get_marker_quota_contributions (posix_xattr_filler_t *filler, char *key)
+{
+ char *saveptr = NULL, *token = NULL, *tmp_key = NULL;
+ char *ptr = NULL;
+ int i = 0, ret = 0;
+
+ tmp_key = ptr = gf_strdup (key);
+ for (i = 0; i < 4; i++) {
+ token = strtok_r (tmp_key, ".", &saveptr);
+ tmp_key = NULL;
+ }
+
+ if (strncmp (token, "contri", strlen ("contri")) == 0) {
+ ret = _posix_get_marker_all_contributions (filler);
+ } else {
+ ret = _posix_xattr_get_set_from_backend (filler, key);
+ }
+
+ GF_FREE (ptr);
+
+ return ret;
+}
+
+static int
_posix_xattr_get_set (dict_t *xattr_req,
char *key,
data_t *data,
void *xattrargs)
{
posix_xattr_filler_t *filler = xattrargs;
- char *value = NULL;
- ssize_t xattr_size = -1;
int ret = -1;
char *databuf = NULL;
int _fd = -1;
@@ -150,6 +266,16 @@ _posix_xattr_get_set (dict_t *xattr_req,
goto err;
}
+ /*
+ * There could be a situation where the ia_size is
+ * zero. GF_CALLOC will return a pointer to the
+ * memory initialized by gf_mem_set_acct_info.
+ * This function adds a header and a footer to
+ * the allocated memory. The returned pointer
+ * points to the memory just after the header, but
+ * when size is zero, there is no space for user
+ * data. The memory can be freed by calling GF_FREE.
+ */
databuf = GF_CALLOC (1, filler->stbuf->ia_size,
gf_posix_mt_char);
if (!databuf) {
@@ -187,47 +313,41 @@ _posix_xattr_get_set (dict_t *xattr_req,
err:
if (_fd != -1)
close (_fd);
- if (databuf)
- GF_FREE (databuf);
+ GF_FREE (databuf);
}
} else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
loc = filler->loc;
- if (loc && !list_empty (&loc->inode->fd_list)) {
- ret = dict_set_uint32 (filler->xattr, key, 1);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- key);
- } else {
- ret = dict_set_uint32 (filler->xattr, key, 0);
+ if (loc) {
+ ret = dict_set_uint32 (filler->xattr, key,
+ loc->inode->fd_count);
if (ret < 0)
gf_log (filler->this->name, GF_LOG_WARNING,
"Failed to set dictionary value for %s",
key);
}
- } else {
- xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
-
- if (xattr_size > 0) {
- value = GF_CALLOC (1, xattr_size + 1,
- gf_posix_mt_char);
- if (!value)
- return;
-
- sys_lgetxattr (filler->real_path, key, value,
- xattr_size);
+ } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
+ char *path = NULL;
+ ret = posix_get_ancestry (filler->this, filler->loc->inode,
+ NULL, &path, POSIX_ANCESTRY_PATH,
+ &filler->op_errno, xattr_req);
+ if (ret < 0) {
+ goto out;
+ }
- value[xattr_size] = '\0';
- ret = dict_set_bin (filler->xattr, key,
- value, xattr_size);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_DEBUG,
- "dict set failed. path: %s, key: %s",
- filler->real_path, key);
+ ret = dict_set_dynstr (filler->xattr, GET_ANCESTRY_PATH_KEY,
+ path);
+ if (ret < 0) {
+ GF_FREE (path);
+ goto out;
}
+
+ } else if (fnmatch (marker_contri_key, key, 0) == 0) {
+ ret = _posix_get_marker_quota_contributions (filler, key);
+ } else {
+ ret = _posix_xattr_get_set_from_backend (filler, key);
}
out:
- return;
+ return 0;
}
@@ -235,14 +355,17 @@ int
posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt)
{
int ret = 0;
+ ssize_t size = 0;
if (!iatt)
return 0;
- ret = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
/* Return value of getxattr */
- if ((ret == 16) || (ret == -1))
+ if ((size == 16) || (size == -1))
ret = 0;
+ else
+ ret = size;
return ret;
}
@@ -252,14 +375,17 @@ int
posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt)
{
int ret = 0;
+ ssize_t size = 0;
if (!iatt)
return 0;
- ret = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ size = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
/* Return value of getxattr */
- if ((ret == 16) || (ret == -1))
+ if ((size == 16) || (size == -1))
ret = 0;
+ else
+ ret = size;
return ret;
}
@@ -277,7 +403,7 @@ posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf)
goto out;
}
for (i = 15; i > (15 - 8); i--) {
- temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
+ temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
j += 8;
}
buf->ia_ino = temp_ino;
@@ -332,11 +458,21 @@ posix_istat (xlator_t *this, uuid_t gfid, const char *basename,
ret = lstat (real_path, &lstatbuf);
- if (ret == -1) {
- if (errno != ENOENT && errno != ELOOP)
- gf_log (this->name, GF_LOG_WARNING,
- "lstat failed on %s (%s)",
- real_path, strerror (errno));
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT && errno != ELOOP)
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed on %s (%s)",
+ real_path, strerror (errno));
+ } else {
+ // may be some backend filesystem issue
+ gf_log (this->name, GF_LOG_ERROR, "lstat failed on "
+ "%s and return value is %d instead of -1. "
+ "Please see dmesg output to check whether the "
+ "failure is due to backend filesystem issue",
+ real_path, ret);
+ ret = -1;
+ }
goto out;
}
@@ -380,11 +516,21 @@ posix_pstat (xlator_t *this, uuid_t gfid, const char *path,
ret = lstat (path, &lstatbuf);
- if (ret == -1) {
- if (errno != ENOENT)
- gf_log (this->name, GF_LOG_WARNING,
- "lstat failed on %s (%s)",
- path, strerror (errno));
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT)
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed on %s (%s)",
+ path, strerror (errno));
+ } else {
+ // may be some backend filesytem issue
+ gf_log (this->name, GF_LOG_ERROR, "lstat failed on "
+ "%s and return value is %d instead of -1. "
+ "Please see dmesg output to check whether the "
+ "failure is due to backend filesystem issue",
+ path, ret);
+ ret = -1;
+ }
goto out;
}
@@ -443,6 +589,7 @@ posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
void *uuid_req = NULL;
uuid_t uuid_curr;
int ret = 0;
+ ssize_t size = 0;
struct stat stat = {0, };
@@ -452,8 +599,8 @@ posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (sys_lstat (path, &stat) != 0)
goto out;
- ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
- if (ret == 16) {
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (size == 16) {
ret = 0;
goto verify_handle;
}
@@ -487,8 +634,8 @@ out:
int
-posix_set_file_contents (xlator_t *this, const char *path, data_pair_t *trav,
- int flags)
+posix_set_file_contents (xlator_t *this, const char *path, char *keyp,
+ data_t *value, int flags)
{
char * key = NULL;
char real_path[PATH_MAX];
@@ -500,7 +647,7 @@ posix_set_file_contents (xlator_t *this, const char *path, data_pair_t *trav,
/* XXX: does not handle assigning GFID to created files */
return -1;
- key = &(trav->key[15]);
+ key = &(keyp[15]);
sprintf (real_path, "%s/%s", path, key);
if (flags & XATTR_REPLACE) {
@@ -512,9 +659,8 @@ posix_set_file_contents (xlator_t *this, const char *path, data_pair_t *trav,
goto create;
}
- if (trav->value->len) {
- ret = write (file_fd, trav->value->data,
- trav->value->len);
+ if (value->len) {
+ ret = write (file_fd, value->data, value->len);
if (ret == -1) {
op_ret = -errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -546,7 +692,7 @@ posix_set_file_contents (xlator_t *this, const char *path, data_pair_t *trav,
goto out;
}
- ret = write (file_fd, trav->value->data, trav->value->len);
+ ret = write (file_fd, value->data, value->len);
if (ret == -1) {
op_ret = -errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -630,8 +776,7 @@ posix_get_file_contents (xlator_t *this, uuid_t pargfid,
out:
if (op_ret < 0) {
- if (*contents)
- GF_FREE (*contents);
+ GF_FREE (*contents);
if (file_fd != -1)
close (file_fd);
}
@@ -639,52 +784,81 @@ out:
return op_ret;
}
+#ifdef GF_DARWIN_HOST_OS
+static
+void posix_dump_buffer (xlator_t *this, const char *real_path, const char *key,
+ data_t *value, int flags)
+{
+ char buffer[3*value->len+1];
+ int index = 0;
+ buffer[0] = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel ();
+ if (log_level == GF_LOG_TRACE) {
+ char *data = (char *) value->data;
+ for (index = 0; index < value->len; index++)
+ sprintf(buffer+3*index, " %02x", data[index]);
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Dump %s: key:%s flags: %u length:%u data:%s ",
+ real_path, key, flags, value->len,
+ (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>"));
+}
+#endif
+
static int gf_xattr_enotsup_log;
int
posix_handle_pair (xlator_t *this, const char *real_path,
- data_pair_t *trav, int flags)
+ char *key, data_t *value, int flags)
{
int sys_ret = -1;
int ret = 0;
- if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
- ret = posix_set_file_contents (this, real_path, trav, flags);
+ if (XATTR_IS_PATHINFO (key)) {
+ ret = -EACCES;
+ goto out;
+ } else if (ZR_FILE_CONTENT_REQUEST(key)) {
+ ret = posix_set_file_contents (this, real_path, key, value,
+ flags);
} else {
- sys_ret = sys_lsetxattr (real_path, trav->key,
- trav->value->data,
- trav->value->len, flags);
-
+ sys_ret = sys_lsetxattr (real_path, key, value->data,
+ value->len, flags);
+#ifdef GF_DARWIN_HOST_OS
+ posix_dump_buffer(this, real_path, key, value, flags);
+#endif
if (sys_ret < 0) {
+ ret = -errno;
if (errno == ENOTSUP) {
GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
this->name,GF_LOG_WARNING,
"Extended attributes not "
- "supported");
- } else if (errno == ENOENT &&
- !posix_special_xattr (marker_xattrs,
- trav->key)) {
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr on %s failed: %s", real_path,
- strerror (errno));
+ "supported (try remounting "
+ "brick with 'user_xattr' "
+ "flag)");
+ } else if (errno == ENOENT) {
+ if (!posix_special_xattr (marker_xattrs,
+ key)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setxattr on %s failed: %s",
+ real_path, strerror (errno));
+ }
} else {
#ifdef GF_DARWIN_HOST_OS
gf_log (this->name,
((errno == EINVAL) ?
GF_LOG_DEBUG : GF_LOG_ERROR),
- "%s: key:%s error:%s",
- real_path, trav->key,
+ "%s: key:%s flags: %u length:%d error:%s",
+ real_path, key, flags, value->len,
strerror (errno));
#else /* ! DARWIN */
gf_log (this->name, GF_LOG_ERROR,
- "%s: key:%s error:%s",
- real_path, trav->key,
+ "%s: key:%s flags: %u length:%d error:%s",
+ real_path, key, flags, value->len,
strerror (errno));
#endif /* DARWIN */
}
- ret = -errno;
goto out;
}
}
@@ -694,20 +868,28 @@ out:
int
posix_fhandle_pair (xlator_t *this, int fd,
- data_pair_t *trav, int flags)
+ char *key, data_t *value, int flags)
{
int sys_ret = -1;
int ret = 0;
- sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data,
- trav->value->len, flags);
+ if (XATTR_IS_PATHINFO (key)) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ sys_ret = sys_fsetxattr (fd, key, value->data,
+ value->len, flags);
if (sys_ret < 0) {
+ ret = -errno;
if (errno == ENOTSUP) {
GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
this->name,GF_LOG_WARNING,
"Extended attributes not "
- "supported");
+ "supported (try remounting "
+ "brick with 'user_xattr' "
+ "flag)");
} else if (errno == ENOENT) {
gf_log (this->name, GF_LOG_ERROR,
"fsetxattr on fd=%d failed: %s", fd,
@@ -719,17 +901,14 @@ posix_fhandle_pair (xlator_t *this, int fd,
((errno == EINVAL) ?
GF_LOG_DEBUG : GF_LOG_ERROR),
"fd=%d: key:%s error:%s",
- fd, trav->key,
- strerror (errno));
+ fd, key, strerror (errno));
#else /* ! DARWIN */
gf_log (this->name, GF_LOG_ERROR,
"fd=%d: key:%s error:%s",
- fd, trav->key,
- strerror (errno));
+ fd, key, strerror (errno));
#endif /* DARWIN */
}
- ret = -errno;
goto out;
}
@@ -829,7 +1008,7 @@ posix_janitor_thread_proc (void *data)
time (&now);
if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
gf_log (this->name, GF_LOG_TRACE,
- "janitor cleaning out /" GF_REPLICATE_TRASH_DIR);
+ "janitor cleaning out %s", priv->trash_path);
nftw (priv->trash_path,
janitor_walker,
@@ -870,8 +1049,8 @@ posix_spawn_janitor_thread (xlator_t *this)
LOCK (&priv->lock);
{
if (!priv->janitor_present) {
- ret = pthread_create (&priv->janitor, NULL,
- posix_janitor_thread_proc, this);
+ ret = gf_thread_create (&priv->janitor, NULL,
+ posix_janitor_thread_proc, this);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -887,6 +1066,74 @@ unlock:
UNLOCK (&priv->lock);
}
+static int
+is_fresh_file (struct stat *stat)
+{
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+
+ if ((stat->st_ctime >= (tv.tv_sec - 1))
+ && (stat->st_ctime <= tv.tv_sec))
+ return 1;
+
+ return 0;
+}
+
+
+int
+posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
+{
+ /* The purpose of this function is to prevent a race
+ where an inode creation FOP (like mkdir/mknod/create etc)
+ races with lookup in the following way:
+
+ {create thread} | {lookup thread}
+ |
+ t0
+ mkdir ("name") |
+ t1
+ | posix_gfid_set ("name", 2);
+ t2
+ posix_gfid_set ("name", 1); |
+ t3
+ lstat ("name"); | lstat ("name");
+
+ In the above case mkdir FOP would have resulted with GFID 2 while
+ it should have been GFID 1. It matters in the case where GFID would
+ have gotten set to 1 on other subvolumes of replciate/distribute
+
+ The "solution" here is that, if we detect lookup is attempting to
+ set a GFID on a file which is created very recently, but does not
+ yet have a GFID (i.e, between t1 and t2), then "fake" it as though
+ posix_gfid_heal was called at t0 instead.
+ */
+
+ uuid_t uuid_curr;
+ int ret = 0;
+ struct stat stat = {0, };
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (ret != 16) {
+ if (is_fresh_file (&stat)) {
+ ret = -1;
+ errno = ENOENT;
+ goto out;
+ }
+ }
+
+ ret = posix_gfid_set (this, path, loc, xattr_req);
+out:
+ return ret;
+}
+
+
int
posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req)
{
@@ -900,17 +1147,17 @@ posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req)
if (sys_lstat (path, &stat) != 0)
goto out;
- data = dict_get (xattr_req, "system.posix_acl_access");
+ data = dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR);
if (data) {
- ret = sys_lsetxattr (path, "system.posix_acl_access",
+ ret = sys_lsetxattr (path, POSIX_ACL_ACCESS_XATTR,
data->data, data->len, 0);
if (ret != 0)
goto out;
}
- data = dict_get (xattr_req, "system.posix_acl_default");
+ data = dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR);
if (data) {
- ret = sys_lsetxattr (path, "system.posix_acl_default",
+ ret = sys_lsetxattr (path, POSIX_ACL_DEFAULT_XATTR,
data->data, data->len, 0);
if (ret != 0)
goto out;
@@ -920,37 +1167,47 @@ out:
return ret;
}
+static int
+_handle_entry_create_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int ret = -1;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ if (!strcmp (GFID_XATTR_KEY, k) ||
+ !strcmp ("gfid-req", k) ||
+ !strcmp (POSIX_ACL_DEFAULT_XATTR, k) ||
+ !strcmp (POSIX_ACL_ACCESS_XATTR, k) ||
+ ZR_FILE_CONTENT_REQUEST(k)) {
+ return 0;
+ }
+
+ ret = posix_handle_pair (filler->this, filler->real_path, k, v,
+ XATTR_CREATE);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+ }
+ return 0;
+}
+
int
posix_entry_create_xattr_set (xlator_t *this, const char *path,
dict_t *dict)
{
- data_pair_t *trav = NULL;
int ret = -1;
+ posix_xattr_filler_t filler = {0,};
+
if (!dict)
goto out;
- trav = dict->members_list;
- while (trav) {
- if (!strcmp (GFID_XATTR_KEY, trav->key) ||
- !strcmp ("gfid-req", trav->key) ||
- !strcmp ("system.posix_acl_default", trav->key) ||
- !strcmp ("system.posix_acl_access", trav->key) ||
- ZR_FILE_CONTENT_REQUEST(trav->key)) {
- trav = trav->next;
- continue;
- }
+ filler.this = this;
+ filler.real_path = path;
- ret = posix_handle_pair (this, path, trav, XATTR_CREATE);
- if (ret < 0) {
- errno = -ret;
- ret = -1;
- goto out;
- }
- trav = trav->next;
- }
-
- ret = 0;
+ ret = dict_foreach (dict, _handle_entry_create_keyvalue_pair, &filler);
out:
return ret;
@@ -974,7 +1231,7 @@ __posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p)
goto out;
}
- if (fd->pid != -1)
+ if (!fd_is_anonymous(fd))
/* anonymous fd */
goto out;
@@ -1041,10 +1298,256 @@ posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd)
return ret;
}
+static void *
+posix_health_check_thread_proc (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+ struct stat sb = {0, };
+
+ this = data;
+ priv = this->private;
+
+ /* prevent races when the interval is updated */
+ interval = priv->health_check_interval;
+ if (interval == 0)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, "
+ "interval = %d seconds", interval);
+
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep (interval);
+ if (ret > 0)
+ break;
+
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the health-check, it should be moved to its own function
+ * in case it gets more complex. */
+ ret = stat (priv->base_path, &sb);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stat() on %s returned: %s", priv->base_path,
+ strerror (errno));
+ goto abort;
+ }
+
+ pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting");
+
+ LOCK (&priv->lock);
+ {
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+ return NULL;
+
+abort:
+ /* health-check failed */
+ gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down");
+ xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM");
+ kill (getpid(), SIGTERM);
+ }
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL");
+ kill (getpid(), SIGKILL);
+ }
+
+ return NULL;
+}
+
+void
+posix_spawn_health_check_thread (xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK (&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->health_check_active == _gf_true) {
+ pthread_cancel (priv->health_check);
+ priv->health_check_active = _gf_false;
+ }
+
+ /* prevent scheduling a check in a tight loop */
+ if (priv->health_check_interval == 0)
+ goto unlock;
+
+ ret = gf_thread_create (&priv->health_check, NULL,
+ posix_health_check_thread_proc, xl);
+ if (ret < 0) {
+ priv->health_check_interval = 0;
+ priv->health_check_active = _gf_false;
+ gf_log (xl->name, GF_LOG_ERROR,
+ "unable to setup health-check thread: %s",
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* run the thread detached, resources will be freed on exit */
+ pthread_detach (priv->health_check);
+ priv->health_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
int
-posix_fd_ctx_get_off (fd_t *fd, xlator_t *this, struct posix_fd **pfd,
- off_t offset)
+posix_fsyncer_pick (xlator_t *this, struct list_head *head)
{
- return posix_fd_ctx_get (fd, this, pfd);
+ struct posix_private *priv = NULL;
+ int count = 0;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ while (list_empty (&priv->fsyncs))
+ pthread_cond_wait (&priv->fsync_cond,
+ &priv->fsync_mutex);
+
+ count = priv->fsync_queue_count;
+ priv->fsync_queue_count = 0;
+ list_splice_init (&priv->fsyncs, head);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return count;
+}
+
+
+void
+posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync)
+{
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (stub->args.fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fdctx for fd(%s)",
+ uuid_utoa (stub->args.fd->inode->gfid));
+ call_unwind_error (stub, -1, EINVAL);
+ return;
+ }
+
+ if (do_fsync) {
+ if (stub->args.datasync)
+ ret = sys_fdatasync (pfd->fd);
+ else
+ ret = sys_fsync (pfd->fd);
+ } else {
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not fstat fd(%s)",
+ uuid_utoa (stub->args.fd->inode->gfid));
+ call_unwind_error (stub, -1, errno);
+ return;
+ }
+
+ call_unwind_error (stub, 0, 0);
+}
+
+
+static void
+posix_fsyncer_syncfs (xlator_t *this, struct list_head *head)
+{
+ call_stub_t *stub = NULL;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+
+ stub = list_entry (head->prev, call_stub_t, list);
+ ret = posix_fd_ctx_get (stub->args.fd, this, &pfd);
+ if (ret)
+ return;
+
+#ifdef GF_LINUX_HOST_OS
+ /* syncfs() is not "declared" in RHEL's glibc even though
+ the kernel has support.
+ */
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef SYS_syncfs
+ syscall (SYS_syncfs, pfd->fd);
+#else
+ sync();
+#endif
+#else
+ sync();
+#endif
+}
+
+
+void *
+posix_fsyncer (void *d)
+{
+ xlator_t *this = d;
+ struct posix_private *priv = NULL;
+ call_stub_t *stub = NULL;
+ call_stub_t *tmp = NULL;
+ struct list_head list;
+ int count = 0;
+ gf_boolean_t do_fsync = _gf_true;
+
+ priv = this->private;
+
+ for (;;) {
+ INIT_LIST_HEAD (&list);
+
+ count = posix_fsyncer_pick (this, &list);
+
+ usleep (priv->batch_fsync_delay_usec);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "picked %d fsyncs", count);
+
+ switch (priv->batch_fsync_mode) {
+ case BATCH_NONE:
+ case BATCH_REVERSE_FSYNC:
+ break;
+ case BATCH_SYNCFS:
+ case BATCH_SYNCFS_SINGLE_FSYNC:
+ case BATCH_SYNCFS_REVERSE_FSYNC:
+ posix_fsyncer_syncfs (this, &list);
+ break;
+ }
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS)
+ do_fsync = _gf_false;
+ else
+ do_fsync = _gf_true;
+
+ list_for_each_entry_safe_reverse (stub, tmp, &list, list) {
+ list_del_init (&stub->list);
+
+ posix_fsyncer_process (this, stub, do_fsync);
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC)
+ do_fsync = _gf_false;
+ }
+ }
}
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index 10aa75edc..81752c17e 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __POSIX_MEM_TYPES_H__
#define __POSIX_MEM_TYPES_H__
@@ -30,6 +20,7 @@ enum gf_posix_mem_types_ {
gf_posix_mt_int32_t,
gf_posix_mt_posix_dev_t,
gf_posix_mt_trash_path,
+ gf_posix_mt_paiocb,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 168e7f7d2..f7800184e 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -33,6 +23,8 @@
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -59,8 +51,11 @@
#include "timer.h"
#include "glusterfs3-xdr.h"
#include "hashfn.h"
+#include "posix-aio.h"
+#include "glusterfs-acl.h"
extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
#undef HAVE_SET_FSID
#ifdef HAVE_SET_FSID
@@ -84,7 +79,6 @@ extern char *marker_xattrs[];
#define SET_TO_OLD_FS_ID()
#endif
-
int
posix_forget (xlator_t *this, inode_t *inode)
{
@@ -110,16 +104,18 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
char * par_path = NULL;
struct iatt postparent = {0,};
int32_t gfidless = 0;
+ struct posix_private *priv = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
+
+ priv = this->private;
/* The Hidden directory should be for housekeeping purpose and it
should not get any gfid on it */
- if (__is_root_gfid (loc->pargfid) &&
- (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ if (__is_root_gfid (loc->pargfid) && loc->name
+ && (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
gf_log (this->name, GF_LOG_WARNING,
"Lookup issued on %s, which is not permitted",
GF_HIDDEN_PATH);
@@ -130,14 +126,14 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless);
op_ret = -1;
- if (uuid_is_null (loc->pargfid)) {
+ if (uuid_is_null (loc->pargfid) || (loc->name == NULL)) {
/* nameless lookup */
MAKE_INODE_HANDLE (real_path, this, loc, &buf);
} else {
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf);
if (uuid_is_null (loc->inode->gfid)) {
- posix_gfid_set (this, real_path, loc, xdata);
+ posix_gfid_heal (this, real_path, loc, xdata);
MAKE_ENTRY_HANDLE (real_path, par_path, this,
loc, &buf);
}
@@ -169,6 +165,12 @@ parent:
gf_log (this->name, GF_LOG_ERROR,
"post-operation lstat on parent %s failed: %s",
par_path, strerror (op_errno));
+ if (op_errno == ENOENT)
+ /* If parent directory is missing in a lookup,
+ errno should be ESTALE (bad handle) and not
+ ENOENT (missing entry)
+ */
+ op_errno = ESTALE;
goto out;
}
}
@@ -218,7 +220,8 @@ posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
"lstat on %s failed: %s", real_path,
strerror (op_errno));
goto out;
@@ -359,23 +362,23 @@ posix_setattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- if (valid & GF_SET_ATTR_MODE) {
- op_ret = posix_do_chmod (this, real_path, stbuf);
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
+ op_ret = posix_do_chown (this, real_path, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "setattr (chmod) on %s failed: %s", real_path,
+ "setattr (chown) on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
}
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
- op_ret = posix_do_chown (this, real_path, stbuf, valid);
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_chmod (this, real_path, stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "setattr (chown) on %s failed: %s", real_path,
+ "setattr (chmod) on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
@@ -569,6 +572,318 @@ out:
return 0;
}
+#ifdef FALLOC_FL_KEEP_SIZE
+static int32_t
+posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ struct posix_fd *pfd = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fallocate (fstat) failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fallocate (fstat) failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+#endif /* FALLOC_FL_KEEP_SIZE */
+
+char*
+_page_aligned_alloc (size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char);
+ if (!alloc_buf)
+ goto out;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+out:
+ return alloc_buf;
+}
+
+static int32_t
+_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct)
+{
+ off_t num_vect = 0;
+ off_t num_loop = 1;
+ off_t idx = 0;
+ int32_t op_ret = -1;
+ int32_t vect_size = VECTOR_SIZE;
+ off_t remain = 0;
+ off_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+ if (len < VECTOR_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size ;
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC (num_vect, sizeof(struct iovec),
+ gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+ if (o_direct) {
+ alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
+ if (!alloc_buf) {
+ gf_log ("_posix_do_zerofill", GF_LOG_DEBUG,
+ "memory alloc failed, vect_size %d: %s",
+ vect_size, strerror(errno));
+ GF_FREE(vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC (vect_size, sizeof(char),
+ gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE(vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ op_ret = -1;
+ goto err;
+ }
+
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = writev(fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (extra) {
+ op_ret = writev(fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = writev(fd, vector , 1);
+ if (op_ret < 0)
+ goto err;
+ }
+err:
+ if (o_direct)
+ GF_FREE(alloc_buf);
+ else
+ GF_FREE(iov_base);
+ GF_FREE(vector);
+ return op_ret;
+}
+
+static int32_t
+posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ struct posix_fd *pfd = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pre-operation fstat failed on fd = %p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+ ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
+ if (ret < 0) {
+ ret = -errno;
+ gf_log(this->name, GF_LOG_ERROR,
+ "zerofill failed on fd %d length %" PRId64 " %s",
+ pfd->fd, len, strerror(errno));
+ goto out;
+ }
+ if (pfd->flags & (O_SYNC|O_DSYNC)) {
+ ret = fsync (pfd->fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ pfd->fd, strerror (errno));
+ ret = -errno;
+ goto out;
+ }
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "post operation fstat failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+static int32_t
+_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret;
+#ifndef FALLOC_FL_KEEP_SIZE
+ ret = EOPNOTSUPP;
+
+#else /* FALLOC_FL_KEEP_SIZE */
+ int32_t flags = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ if (keep_size)
+ flags = FALLOC_FL_KEEP_SIZE;
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+#endif /* FALLOC_FL_KEEP_SIZE */
+ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret;
+#ifndef FALLOC_FL_KEEP_SIZE
+ ret = EOPNOTSUPP;
+
+#else /* FALLOC_FL_KEEP_SIZE */
+ int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+#endif /* FALLOC_FL_KEEP_SIZE */
+ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_zerofill(frame, this, fd, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+
+}
+
+static int32_t
+posix_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ /*
+ * IPC is for inter-translator communication. If one gets here, it
+ * means somebody sent one that nobody else recognized, which is an
+ * error much like an uncaught exception.
+ */
+ gf_log (this->name, GF_LOG_ERROR, "GF_LOG_IPC(%d) not handled", op);
+ STACK_UNWIND_STRICT (ipc, frame, -1, -EOPNOTSUPP, NULL);
+ return 0;
+
+}
+
int32_t
posix_opendir (call_frame_t *frame, xlator_t *this,
loc_t *loc, fd_t *fd, dict_t *xdata)
@@ -584,7 +899,6 @@ posix_opendir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
VALIDATE_OR_GOTO (fd, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -736,18 +1050,20 @@ int
posix_mknod (call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
- int tmp_fd = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = 0;
- char *par_path = 0;
- struct iatt stbuf = { 0, };
- char was_present = 1;
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- void * uuid_req = NULL;
+ int tmp_fd = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ char *par_path = 0;
+ struct iatt stbuf = { 0, };
+ char was_present = 1;
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ void * uuid_req = NULL;
+ int32_t nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -850,6 +1166,16 @@ post_op:
strerror (errno));
}
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+
+ SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+
+ignore:
op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -913,6 +1239,18 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get created from a user request */
+ if (__is_root_gfid (loc->pargfid) &&
+ (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir issued on %s, which is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
@@ -966,7 +1304,6 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
goto out;
}
#endif
-
op_ret = posix_acl_xattr_set (this, real_path, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1020,15 +1357,17 @@ int32_t
posix_unlink (call_frame_t *frame, xlator_t *this,
loc_t *loc, int xflag, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- char *par_path = NULL;
- int32_t fd = -1;
- struct iatt stbuf;
- struct posix_private *priv = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ int32_t fd = -1;
+ struct iatt stbuf = {0,};
+ struct posix_private *priv = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
DECLARE_OLD_FS_ID_VAR;
@@ -1066,6 +1405,26 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
}
}
+ if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ LOCK (&loc->inode->lock);
+ {
+ UNLINK_MODIFY_PGFID_XATTR (real_path, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret,
+ this, unlock);
+ }
+ unlock:
+ UNLOCK (&loc->inode->lock);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "modification of "
+ "parent gfid xattr failed (path:%s gfid:%s)",
+ real_path, uuid_utoa (loc->inode->gfid));
+ goto out;
+ }
+ }
+
op_ret = sys_unlink (real_path);
if (op_ret == -1) {
op_errno = errno;
@@ -1108,6 +1467,7 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,
int32_t op_errno = 0;
char * real_path = NULL;
char * par_path = NULL;
+ char * gfid_str = NULL;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
struct iatt stbuf;
@@ -1147,12 +1507,13 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,
}
if (flags) {
- uint32_t hashval = 0;
- char *tmp_path = alloca (strlen (priv->trash_path) + 16);
+ gfid_str = uuid_utoa (stbuf.ia_gfid);
+ char *tmp_path = alloca (strlen (priv->trash_path) +
+ strlen ("/") +
+ strlen (gfid_str) + 1);
mkdir (priv->trash_path, 0755);
- hashval = gf_dm_hashfn (real_path, strlen (real_path));
- sprintf (tmp_path, "%s/%u", priv->trash_path, hashval);
+ sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str);
op_ret = rename (real_path, tmp_path);
} else {
op_ret = rmdir (real_path);
@@ -1205,16 +1566,18 @@ int
posix_symlink (call_frame_t *frame, xlator_t *this,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = 0;
- char * par_path = 0;
- struct iatt stbuf = { 0, };
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- char was_present = 1;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ char * par_path = 0;
+ struct iatt stbuf = { 0, };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ char was_present = 1;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
DECLARE_OLD_FS_ID_VAR;
@@ -1275,7 +1638,6 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
goto out;
}
#endif
-
op_ret = posix_acl_xattr_set (this, real_path, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1283,6 +1645,14 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
strerror (errno));
}
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+ SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+ignore:
op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1329,24 +1699,26 @@ int
posix_rename (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_oldpath = NULL;
- char *real_newpath = NULL;
- char *par_oldpath = NULL;
- char *par_newpath = NULL;
- struct iatt stbuf = {0, };
- struct posix_private *priv = NULL;
- char was_present = 1;
- struct iatt preoldparent = {0, };
- struct iatt postoldparent = {0, };
- struct iatt prenewparent = {0, };
- struct iatt postnewparent = {0, };
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_oldpath = NULL;
+ char *real_newpath = NULL;
+ char *par_oldpath = NULL;
+ char *par_newpath = NULL;
+ struct iatt stbuf = {0, };
+ struct posix_private *priv = NULL;
+ char was_present = 1;
+ struct iatt preoldparent = {0, };
+ struct iatt postoldparent = {0, };
+ struct iatt prenewparent = {0, };
+ struct iatt postnewparent = {0, };
char olddirid[64];
char newdirid[64];
- uuid_t victim = {0};
- int was_dir = 0;
- int nlink = 0;
+ uuid_t victim = {0};
+ int was_dir = 0;
+ int nlink = 0;
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
DECLARE_OLD_FS_ID_VAR;
@@ -1411,17 +1783,64 @@ posix_rename (call_frame_t *frame, xlator_t *this,
goto out;
}
- if (IA_ISDIR (oldloc->inode->ia_type)) {
+ if (IA_ISDIR (oldloc->inode->ia_type))
posix_handle_unset (this, oldloc->inode->gfid, NULL);
+
+ LOCK (&oldloc->inode->lock);
+ {
+ if (!IA_ISDIR (oldloc->inode->ia_type)
+ && priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
+ PGFID_XATTR_KEY_PREFIX,
+ oldloc->pargfid);
+ UNLINK_MODIFY_PGFID_XATTR (real_oldpath,
+ pgfid_xattr_key,
+ nlink_samepgfid, 0,
+ op_ret,
+ this, unlock);
+ }
+
+ op_ret = sys_rename (real_oldpath, real_newpath);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name,
+ (op_errno == ENOTEMPTY ? GF_LOG_DEBUG
+ : GF_LOG_ERROR),
+ "rename of %s to %s failed: %s",
+ real_oldpath, real_newpath,
+ strerror (op_errno));
+
+ if (priv->update_pgfid_nlinks
+ && !IA_ISDIR (oldloc->inode->ia_type)) {
+ LINK_MODIFY_PGFID_XATTR (real_oldpath,
+ pgfid_xattr_key,
+ nlink_samepgfid, 0,
+ op_ret,
+ this, unlock);
+ }
+
+ goto unlock;
+ }
+
+ if (!IA_ISDIR (oldloc->inode->ia_type)
+ && priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
+ PGFID_XATTR_KEY_PREFIX,
+ newloc->pargfid);
+ LINK_MODIFY_PGFID_XATTR (real_newpath,
+ pgfid_xattr_key,
+ nlink_samepgfid, 0,
+ op_ret,
+ this, unlock);
+ }
}
+unlock:
+ UNLOCK (&oldloc->inode->lock);
- op_ret = sys_rename (real_oldpath, real_newpath);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name,
- (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
- "rename of %s to %s failed: %s",
- real_oldpath, real_newpath, strerror (op_errno));
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "modification of "
+ "parent gfid xattr failed (gfid:%s)",
+ uuid_utoa (oldloc->inode->gfid));
goto out;
}
@@ -1485,16 +1904,18 @@ int
posix_link (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_oldpath = 0;
- char *real_newpath = 0;
- char *par_newpath = 0;
- struct iatt stbuf = {0, };
- struct posix_private *priv = NULL;
- char was_present = 1;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_oldpath = 0;
+ char *real_newpath = 0;
+ char *par_newpath = 0;
+ struct iatt stbuf = {0, };
+ struct posix_private *priv = NULL;
+ char was_present = 1;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int32_t nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -1522,18 +1943,9 @@ posix_link (call_frame_t *frame, xlator_t *this,
goto out;
}
-#ifdef HAVE_LINKAT
- /*
- * On most systems (Linux being the notable exception), link(2)
- * first resolves symlinks. If the target is a directory or
- * is nonexistent, it will fail. linkat(2) operates on the
- * symlink instead of its target when the AT_SYMLINK_FOLLOW
- * flag is not supplied.
- */
- op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0);
-#else
- op_ret = link (real_oldpath, real_newpath);
-#endif
+
+ op_ret = sys_link (real_oldpath, real_newpath);
+
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -1559,6 +1971,27 @@ posix_link (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ newloc->pargfid);
+
+ LOCK (&newloc->inode->lock);
+ {
+ LINK_MODIFY_PGFID_XATTR (real_newpath, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret,
+ this, unlock);
+ }
+ unlock:
+ UNLOCK (&newloc->inode->lock);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "modification of "
+ "parent gfid xattr failed (path:%s gfid:%s)",
+ real_newpath, uuid_utoa (newloc->inode->gfid));
+ goto out;
+ }
+ }
+
op_ret = 0;
out:
@@ -1625,7 +2058,6 @@ posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
}
op_ret = 0;
-
out:
SET_TO_OLD_FS_ID ();
@@ -1641,20 +2073,23 @@ posix_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
mode_t umask, fd_t *fd, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t _fd = -1;
- int _flags = 0;
- char * real_path = NULL;
- char * par_path = NULL;
- struct iatt stbuf = {0, };
- struct posix_fd * pfd = NULL;
- struct posix_private * priv = NULL;
- char was_present = 1;
-
- gid_t gid = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t _fd = -1;
+ int _flags = 0;
+ char * real_path = NULL;
+ char * par_path = NULL;
+ struct iatt stbuf = {0, };
+ struct posix_fd * pfd = NULL;
+ struct posix_private * priv = NULL;
+ char was_present = 1;
+
+ gid_t gid = 0;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+
+ int nlink_samepgfid = 0;
+ char * pgfid_xattr_key = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -1712,6 +2147,9 @@ posix_create (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (was_present)
+ goto fill_stat;
+
op_ret = posix_gfid_set (this, real_path, loc, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1727,7 +2165,6 @@ posix_create (call_frame_t *frame, xlator_t *this,
real_path, strerror (op_errno));
}
#endif
-
op_ret = posix_acl_xattr_set (this, real_path, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1735,6 +2172,14 @@ posix_create (call_frame_t *frame, xlator_t *this,
strerror (errno));
}
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+ SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+ignore:
op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1742,6 +2187,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
strerror (errno));
}
+fill_stat:
op_ret = posix_fdstat (this, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
@@ -1796,7 +2242,7 @@ out:
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,
fd, (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent, NULL);
+ &postparent, xdata);
return 0;
}
@@ -1878,9 +2324,6 @@ out:
return 0;
}
-#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
- (unsigned long)(~(bound - 1))))
-
int
posix_readv (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
@@ -1962,11 +2405,7 @@ posix_readv (call_frame_t *frame, xlator_t *this,
}
/* Hack to notify higher layers of EOF. */
- if (stbuf.ia_size == 0)
- op_errno = ENOENT;
- else if ((offset + vec.iov_len) == stbuf.ia_size)
- op_errno = ENOENT;
- else if (offset > stbuf.ia_size)
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
op_errno = ENOENT;
op_ret = vec.iov_len;
@@ -2011,14 +2450,12 @@ err:
return op_ret;
}
-
int32_t
__posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
int odirect)
{
int32_t op_ret = 0;
int idx = 0;
- int align = 4096;
int max_buf_size = 0;
int retval = 0;
char *buf = NULL;
@@ -2034,7 +2471,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
max_buf_size = vector[idx].iov_len;
}
- alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char);
+ alloc_buf = _page_aligned_alloc (max_buf_size, &buf);
if (!alloc_buf) {
op_ret = -errno;
goto err;
@@ -2042,9 +2479,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
internal_off = startoff;
for (idx = 0; idx < count; idx++) {
- /* page aligned buffer */
- buf = ALIGN_BUF (alloc_buf, align);
-
memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);
/* not sure whether writev works on O_DIRECT'd fd */
@@ -2059,12 +2493,53 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
}
err:
- if (alloc_buf)
- GF_FREE (alloc_buf);
+ GF_FREE (alloc_buf);
return op_ret;
}
+dict_t*
+_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ if (fd)
+ inode = fd->inode;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: "
+ "fd: %p inode: %p gfid:%s", fd, inode?inode:0,
+ inode?uuid_utoa(inode->gfid):"N/A");
+ goto out;
+ }
+
+ if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT))
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
+ "dictionary value for %s", uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_OPEN_FD_COUNT);
+ }
+
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
+ is_append);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
+ "dictionary value for %s", uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_WRITE_IS_APPEND);
+ }
+out:
+ return rsp_xdata;
+}
int32_t
posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
@@ -2079,6 +2554,9 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt preop = {0,};
struct iatt postop = {0,};
int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -2100,6 +2578,17 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
_fd = pfd->fd;
+ if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ /* The write_is_append check and write must happen
+ atomically. Else another write can overtake this
+ write after the check and get written earlier.
+
+ So lock before preop-stat and unlock after write.
+ */
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
@@ -2109,8 +2598,19 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto out;
}
+ if (locked) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+ }
+
op_ret = __posix_writev (_fd, vector, count, offset,
(pfd->flags & O_DIRECT));
+
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
@@ -2126,14 +2626,21 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
UNLOCK (&priv->lock);
if (op_ret >= 0) {
+ rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append);
/* wiretv successful, we also need to get the stat of
* the file we wrote to
*/
- if (pfd->flushwrites) {
- /* NOTE: ignore the error, if one occurs at this
- * point */
- fsync (_fd);
+ if (flags & (O_SYNC|O_DSYNC)) {
+ ret = fsync (_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ _fd, strerror (errno));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
}
ret = posix_fdstat (this, _fd, &postop);
@@ -2149,9 +2656,16 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
out:
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+
STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop,
- NULL);
+ rsp_xdata);
+ if (rsp_xdata)
+ dict_unref (rsp_xdata);
return 0;
}
@@ -2278,6 +2792,33 @@ out:
}
+int
+posix_batch_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ list_add_tail (&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal (&priv->fsync_cond);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return 0;
+}
+
+
int32_t
posix_fsync (call_frame_t *frame, xlator_t *this,
fd_t *fd, int32_t datasync, dict_t *xdata)
@@ -2289,6 +2830,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
int ret = -1;
struct iatt preop = {0,};
struct iatt postop = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -2304,6 +2846,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
goto out;
#endif
+ priv = this->private;
+ if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
+ posix_batch_fsync (frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
@@ -2325,16 +2873,14 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
if (datasync) {
;
-#ifdef HAVE_FDATASYNC
- op_ret = fdatasync (_fd);
+ op_ret = sys_fdatasync (_fd);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"fdatasync on fd=%p failed: %s",
fd, strerror (errno));
}
-#endif
} else {
- op_ret = fsync (_fd);
+ op_ret = sys_fsync (_fd);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -2365,6 +2911,34 @@ out:
}
static int gf_posix_xattr_enotsup_log;
+static int
+_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_handle_pair (filler->this, filler->real_path, k, v,
+ filler->flags);
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static inline int
+map_xattr_flags(int flags)
+{
+ /* DARWIN has different defines on XATTR_ flags.
+ There do not seem to be a POSIX standard
+ Parse any other flags over.
+ */
+ int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
+ if (GF_XATTR_CREATE & flags)
+ darwinflags |= XATTR_CREATE;
+ if (GF_XATTR_REPLACE & flags)
+ darwinflags |= XATTR_REPLACE;
+ return darwinflags;
+}
+#endif
int32_t
posix_setxattr (call_frame_t *frame, xlator_t *this,
@@ -2373,8 +2947,8 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
- data_pair_t * trav = NULL;
- int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -2388,26 +2962,404 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
op_ret = -1;
dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
- trav = dict->members_list;
+ filler.real_path = real_path;
+ filler.this = this;
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *dict, dict_t *xdata)
+{
+ char *real_path = NULL;
+ struct dirent *dirent = NULL;
+ DIR *fd = NULL;
+ const char *fname = NULL;
+ char *found = NULL;
+ int ret = -1;
+ int op_ret = -1;
+
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+
+ fd = opendir (real_path);
+ if (!fd)
+ return -errno;
+
+ fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY);
+
+ while ((dirent = readdir (fd))) {
+ if (strcasecmp (dirent->d_name, fname) == 0) {
+ found = gf_strdup (dirent->d_name);
+ if (!found) {
+ closedir (fd);
+ return -ENOMEM;
+ }
+ break;
+ }
+ }
+
+ closedir (fd);
+
+ if (!found)
+ return -ENOENT;
+
+ ret = dict_set_dynstr (dict, (char *)key, found);
+ if (ret) {
+ GF_FREE (found);
+ return -ENOMEM;
+ }
+ ret = strlen (found) + 1;
+
+ return ret;
+}
+
+int
+posix_get_ancestry_directory (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ ssize_t handle_size = 0;
+ struct posix_private *priv = NULL;
+ char dirpath[PATH_MAX+1] = {0,};
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ ret = posix_make_ancestryfromgfid (this, dirpath, PATH_MAX + 1, head,
+ type | POSIX_ANCESTRY_PATH,
+ leaf_inode->gfid,
+ handle_size, priv->base_path,
+ leaf_inode->table, &inode, xdata);
+ if (ret < 0)
+ goto out;
+
+
+ /* there is already a reference in loc->inode */
+ inode_unref (inode);
+
+ if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) {
+ if (strcmp (dirpath, "/"))
+ dirpath[strlen (dirpath) - 1] = '\0';
+
+ *path = gf_strdup (dirpath);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+posix_links_in_same_directory (char *dirpath, int count, inode_t *leaf_inode,
+ inode_t *parent, uint64_t ino,
+ gf_dirent_t *head, char **path,
+ int type, dict_t *xdata, int32_t *op_errno)
+{
+ DIR *dirp = NULL;
+ int op_ret = -1;
+ struct dirent *entry = NULL;
+ struct dirent *result = NULL;
+ inode_t *linked_inode = NULL;
+ gf_dirent_t *gf_entry = NULL;
+ char temppath[PATH_MAX+1] = {0,};
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ char *tempv = NULL;
+
+ this = THIS;
+
+ priv = this->private;
+
+ dirp = opendir (dirpath);
+ if (!dirp) {
+ *op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not opendir %s: %s", dirpath,
+ strerror (*op_errno));
+ goto out;
+ }
+
+ entry = alloca (offsetof(struct dirent, d_name) + NAME_MAX + 1);
+ if (entry == NULL)
+ goto out;
+
+ while (count > 0) {
+ *op_errno = readdir_r (dirp, entry, &result);
+ if ((result == NULL) || *op_errno)
+ break;
+
+ if (entry->d_ino != ino)
+ continue;
+
+ linked_inode = inode_link (leaf_inode, parent,
+ entry->d_name, NULL);
+
+ GF_ASSERT (linked_inode == leaf_inode);
+ inode_unref (linked_inode);
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ loc_t loc = {0, };
+
+ loc.inode = inode_ref (leaf_inode);
+ uuid_copy (loc.gfid, leaf_inode->gfid);
+
+ strcpy (temppath, dirpath);
+ strcat (temppath, "/");
+ strcat (temppath, entry->d_name);
+
+ gf_entry = gf_dirent_for_name (entry->d_name);
+ gf_entry->inode = inode_ref (leaf_inode);
+ gf_entry->dict
+ = posix_lookup_xattr_fill (this,
+ temppath,
+ &loc, xdata,
+ NULL);
+ list_add_tail (&gf_entry->list, &head->list);
+ loc_wipe (&loc);
+ }
+
+ if (type & POSIX_ANCESTRY_PATH) {
+ strcpy (temppath,
+ &dirpath[priv->base_path_length]);
+ strcat (temppath, "/");
+ strcat (temppath, entry->d_name);
+ if (!*path) {
+ *path = gf_strdup (temppath);
+ } else {
+ /* creating a colon separated */
+ /* list of hard links */
+ tempv = GF_REALLOC (*path, strlen (*path)
+ + 1 // ':'
+ + strlen (temppath) + 1 );
+ if (!tempv) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "realloc failed on path");
+ GF_FREE (*path);
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ *path = tempv;
+ strcat (*path, ":");
+ strcat (*path, temppath);
+ }
+ }
+
+ count--;
+ }
+
+out:
+ if (dirp) {
+ op_ret = closedir (dirp);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "closedir failed: %s",
+ strerror (*op_errno));
+ }
+ }
+
+ return op_ret;
+}
+
+int
+posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ size_t remaining_size = 0;
+ char dirpath[PATH_MAX+1] = {0,}, *leaf_path = NULL;
+ int op_ret = -1, pathlen = -1;
+ ssize_t handle_size = 0;
+ char pgfidstr[UUID_CANONICAL_FORM_LEN+1] = {0,};
+ uuid_t pgfid = {0, };
+ int nlink_samepgfid = 0;
+ struct stat stbuf = {0,};
+ char *list = NULL;
+ int32_t list_offset = 0;
+ char key[4096] = {0,};
+ struct posix_private *priv = NULL;
+ ssize_t size = 0;
+ inode_t *parent = NULL;
+ loc_t *loc = NULL;
+
+ priv = this->private;
+
+ loc = GF_CALLOC (1, sizeof (*loc), gf_posix_mt_char);
+ if (loc == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_copy (loc->gfid, leaf_inode->gfid);
+
+ MAKE_INODE_HANDLE (leaf_path, this, loc, NULL);
+
+ GF_FREE (loc);
+
+ size = sys_llistxattr (leaf_path, NULL, 0);
+ if (size == -1) {
+ *op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
+
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "listxattr failed on %s: %s",
+ leaf_path, strerror (*op_errno));
- while (trav) {
- ret = posix_handle_pair (this, real_path, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
}
- trav = trav->next;
+
+ goto out;
+ }
+
+ if (size == 0) {
+ op_ret = 0;
+ goto out;
}
+ list = alloca (size + 1);
+ if (!list) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ size = sys_llistxattr (leaf_path, list, size);
+ remaining_size = size;
+ list_offset = 0;
+
+ op_ret = sys_lstat (leaf_path, &stbuf);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "lstat failed"
+ " on %s: %s", leaf_path,
+ strerror (*op_errno));
+ goto out;
+ }
+
+ while (remaining_size > 0) {
+ if (*(list + list_offset) == '\0')
+ break;
+ strcpy (key, list + list_offset);
+ if (strncmp (key, PGFID_XATTR_KEY_PREFIX,
+ strlen (PGFID_XATTR_KEY_PREFIX)) != 0)
+ goto next;
+
+ op_ret = sys_lgetxattr (leaf_path, key,
+ &nlink_samepgfid,
+ sizeof(nlink_samepgfid));
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr failed on "
+ "%s: key = %s (%s)",
+ leaf_path,
+ key,
+ strerror (*op_errno));
+ goto out;
+ }
+
+ nlink_samepgfid = ntoh32 (nlink_samepgfid);
+
+ strcpy (pgfidstr, key + strlen(PGFID_XATTR_KEY_PREFIX));
+ uuid_parse (pgfidstr, pgfid);
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ /* constructing the absolute real path of parent dir */
+ strcpy (dirpath, priv->base_path);
+ pathlen = PATH_MAX + 1 - priv->base_path_length;
+
+ op_ret = posix_make_ancestryfromgfid (this,
+ dirpath + priv->base_path_length,
+ pathlen,
+ head,
+ type | POSIX_ANCESTRY_PATH,
+ pgfid,
+ handle_size,
+ priv->base_path,
+ leaf_inode->table,
+ &parent, xdata);
+ if (op_ret < 0) {
+ goto next;
+ }
+
+ dirpath[strlen (dirpath) - 1] = '\0';
+
+ posix_links_in_same_directory (dirpath, nlink_samepgfid,
+ leaf_inode,
+ parent, stbuf.st_ino, head,
+ path, type, xdata, op_errno);
+
+ if (parent != NULL) {
+ inode_unref (parent);
+ parent = NULL;
+ }
+
+ next:
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+ } /* while (remaining_size > 0) */
+
op_ret = 0;
out:
- SET_TO_OLD_FS_ID ();
+ return op_ret;
+}
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+int
+posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type, int32_t *op_errno,
+ dict_t *xdata)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
- return 0;
+ priv = this->private;
+
+ if (!priv->update_pgfid_nlinks)
+ goto out;
+
+ if (IA_ISDIR (leaf_inode->ia_type)) {
+ ret = posix_get_ancestry_directory (this, leaf_inode,
+ head, path, type, op_errno,
+ xdata);
+ } else {
+ ret = posix_get_ancestry_non_directory (this, leaf_inode,
+ head, path, type,
+ op_errno, xdata);
+ }
+
+out:
+ return ret;
}
/**
@@ -2419,23 +3371,23 @@ int32_t
posix_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name, dict_t *xdata)
{
- struct posix_private *priv = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t list_offset = 0;
- size_t size = 0;
- size_t remaining_size = 0;
- char key[4096] = {0,};
- char host_buf[1024] = {0,};
- char *value = NULL;
- char *list = NULL;
- char *real_path = NULL;
- dict_t *dict = NULL;
- char *file_contents = NULL;
- int ret = -1;
- char *path = NULL;
- char *rpath = NULL;
- char *dyn_rpath = NULL;
+ struct posix_private *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char host_buf[1024] = {0,};
+ char *value = NULL;
+ char *real_path = NULL;
+ dict_t *dict = NULL;
+ char *file_contents = NULL;
+ int ret = -1;
+ char *path = NULL;
+ char *rpath = NULL;
+ char *dyn_rpath = NULL;
+ ssize_t size = 0;
+ char *list = NULL;
+ int32_t list_offset = 0;
+ size_t remaining_size = 0;
+ char keybuffer[4096] = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -2462,12 +3414,31 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- /* Get the total size */
- dict = get_new_dict ();
+ dict = dict_new ();
if (!dict) {
+ op_errno = ENOMEM;
goto out;
}
+ if (loc->inode && name &&
+ (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
+ ret = posix_xattr_get_real_filename (frame, this, loc,
+ name, dict, xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ gf_log (this->name, (op_errno == ENOENT) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING,
+ "Failed to get real filename (%s, %s): %s",
+ loc->path, name, strerror (op_errno));
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
if (!list_empty (&loc->inode->fd_list)) {
ret = dict_set_uint32 (dict, (char *)name, 1);
@@ -2484,15 +3455,19 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
}
goto done;
}
- if (loc->inode && name &&
- (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
+ if (loc->inode && name && (XATTR_IS_PATHINFO (name))) {
if (LOC_HAS_ABSPATH (loc))
MAKE_REAL_PATH (rpath, this, loc->path);
else
rpath = real_path;
- (void) snprintf (host_buf, 1024, "<POSIX(%s):%s:%s>",
- priv->base_path, priv->hostname, rpath);
+ (void) snprintf (host_buf, 1024,
+ "<POSIX(%s):%s:%s>", priv->base_path,
+ ((priv->node_uuid_pathinfo
+ && !uuid_is_null(priv->glusterd_uuid))
+ ? uuid_utoa (priv->glusterd_uuid)
+ : priv->hostname),
+ rpath);
dyn_rpath = gf_strdup (host_buf);
if (!dyn_rpath) {
@@ -2500,12 +3475,12 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
goto done;
}
size = strlen (dyn_rpath) + 1;
- ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY,
- dyn_rpath);
+ ret = dict_set_dynstr (dict, (char *)name, dyn_rpath);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"could not set value (%s) in dictionary",
dyn_rpath);
+ GF_FREE (dyn_rpath);
}
goto done;
@@ -2530,6 +3505,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_WARNING,
"could not set value (%s) in dictionary",
dyn_rpath);
+ GF_FREE (dyn_rpath);
}
goto done;
}
@@ -2548,32 +3524,97 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_WARNING,
"could not set value (%s) in dictionary",
host_buf);
+ GF_FREE (path);
}
goto done;
}
- if (name) {
- strcpy (key, name);
+ if (loc->inode && name
+ && (strcmp (name, GET_ANCESTRY_PATH_KEY) == 0)) {
+ int type = POSIX_ANCESTRY_PATH;
- size = sys_lgetxattr (real_path, key, NULL, 0);
- if (size == -1) {
+ op_ret = posix_get_ancestry (this, loc->inode, NULL,
+ &path, type, &op_errno,
+ xdata);
+ if (op_ret < 0) {
op_ret = -1;
- op_errno = errno;
+ op_errno = ENODATA;
goto out;
}
+
+ op_ret = dict_set_dynstr (dict, GET_ANCESTRY_PATH_KEY, path);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "could not get "
+ "value for key (%s)", GET_ANCESTRY_PATH_KEY);
+ GF_FREE (path);
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ goto done;
+ }
+
+ if (name) {
+ strcpy (keybuffer, name);
+ char *key = keybuffer;
+#if defined(GF_DARWIN_HOST_OS_DISABLED)
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(key, "user.",5) == 0) {
+ key += 5;
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "getxattr for file %s"
+ " stripping user key: %s -> %s",
+ real_path, keybuffer, key);
+ }
+ }
+#endif
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ if (size <= 0) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No such attribute:%s for file %s",
+ key, real_path);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr failed on %s: %s (%s)",
+ real_path, key, strerror (op_errno));
+ }
+
+ goto done;
+ }
value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
if (!value) {
op_ret = -1;
+ op_errno = ENOMEM;
goto out;
}
- op_ret = sys_lgetxattr (real_path, key, value, size);
- if (op_ret == -1) {
+ size = sys_lgetxattr (real_path, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
+ "%s: key = %s (%s)", real_path, key,
+ strerror (op_errno));
+ GF_FREE (value);
goto out;
}
- value [op_ret] = '\0';
- op_ret = dict_set_dynptr (dict, key, value, op_ret);
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "on %s for the key %s failed.", real_path, key);
+ GF_FREE (value);
goto out;
}
@@ -2587,7 +3628,9 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
this->name, GF_LOG_WARNING,
"Extended attributes not "
- "supported.");
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
}
else {
gf_log (this->name, GF_LOG_ERROR,
@@ -2613,33 +3656,55 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
while (remaining_size > 0) {
if (*(list + list_offset) == '\0')
break;
-
- strcpy (key, list + list_offset);
- op_ret = sys_lgetxattr (real_path, key, NULL, 0);
- if (op_ret == -1)
+ strcpy (keybuffer, list + list_offset);
+ size = sys_lgetxattr (real_path, keybuffer, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
+ "%s: key = %s (%s)", real_path, keybuffer,
+ strerror (op_errno));
break;
+ }
- value = GF_CALLOC (op_ret + 1, sizeof(char),
+ value = GF_CALLOC (size + 1, sizeof(char),
gf_posix_mt_char);
if (!value) {
op_errno = errno;
goto out;
}
- op_ret = sys_lgetxattr (real_path, key, value, op_ret);
- if (op_ret == -1) {
+ size = sys_lgetxattr (real_path, keybuffer, value, size);
+ if (size == -1) {
+ op_ret = -1;
op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
+ "%s: key = %s (%s)", real_path, keybuffer,
+ strerror (op_errno));
+ GF_FREE (value);
break;
}
- value [op_ret] = '\0';
- op_ret = dict_set_dynptr (dict, key, value, op_ret);
+ value [size] = '\0';
+#ifdef GF_DARWIN_HOST_OS
+ /* The protocol expect namespace for now */
+ char *newkey = NULL;
+ gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey);
+ strcpy (keybuffer, newkey);
+ GF_FREE (newkey);
+#endif
+ op_ret = dict_set_dynptr (dict, keybuffer, value, size);
if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "on %s for the key %s failed.", real_path,
+ keybuffer);
+ GF_FREE (value);
goto out;
}
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
+ remaining_size -= strlen (keybuffer) + 1;
+ list_offset += strlen (keybuffer) + 1;
} /* while (remaining_size > 0) */
@@ -2648,7 +3713,7 @@ done:
if (dict) {
dict_del (dict, GFID_XATTR_KEY);
- dict_ref (dict);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
}
out:
@@ -2656,8 +3721,9 @@ out:
STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL);
- if (dict)
+ if (dict) {
dict_unref (dict);
+ }
return 0;
}
@@ -2672,7 +3738,7 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
struct posix_fd * pfd = NULL;
int _fd = -1;
int32_t list_offset = 0;
- size_t size = 0;
+ ssize_t size = 0;
size_t remaining_size = 0;
char key[4096] = {0,};
char * value = NULL;
@@ -2715,21 +3781,47 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
if (name) {
strcpy (key, name);
-
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ char *newkey = NULL;
+ gf_add_prefix (XATTR_USER_PREFIX, key, &newkey);
+ strcpy (key, newkey);
+ GF_FREE (newkey);
+ }
+#endif
size = sys_fgetxattr (_fd, key, NULL, 0);
+ if (size <= 0) {
+ op_errno = errno;
+ gf_log (this->name, ((errno == ENODATA) ?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "fgetxattr failed on key %s (%s)", key,
+ strerror (op_errno));
+ goto done;
+ }
+
value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
if (!value) {
op_ret = -1;
goto out;
}
- op_ret = sys_fgetxattr (_fd, key, value, op_ret);
- if (op_ret == -1) {
+ size = sys_fgetxattr (_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "fd %p for the key %s (%s)", fd, key,
+ strerror (op_errno));
+ GF_FREE (value);
goto out;
}
- value [op_ret] = '\0';
- op_ret = dict_set_dynptr (dict, key, value, op_ret);
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "on key %s failed", key);
+ GF_FREE (value);
goto out;
}
goto done;
@@ -2742,7 +3834,8 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
this->name, GF_LOG_WARNING,
"Extended attributes not "
- "supported.");
+ "supported (try remounting "
+ "brick with 'user_xattr' flag)");
}
else {
gf_log (this->name, GF_LOG_ERROR,
@@ -2770,24 +3863,42 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
break;
strcpy (key, list + list_offset);
- op_ret = sys_fgetxattr (_fd, key, NULL, 0);
- if (op_ret == -1)
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "fd %p for the key %s (%s)", fd, key,
+ strerror (op_errno));
break;
+ }
- value = GF_CALLOC (op_ret + 1, sizeof(char),
+ value = GF_CALLOC (size + 1, sizeof(char),
gf_posix_mt_char);
if (!value) {
+ op_ret = -1;
op_errno = errno;
goto out;
}
- op_ret = sys_fgetxattr (_fd, key, value, op_ret);
- if (op_ret == -1)
+ size = sys_fgetxattr (_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "the fd %p for the key %s (%s)", fd, key,
+ strerror (op_errno));
+ GF_FREE (value);
break;
+ }
- value [op_ret] = '\0';
- op_ret = dict_set_dynptr (dict, key, value, op_ret);
+ value [size] = '\0';
+
+ op_ret = dict_set_dynptr (dict, key, value, size);
if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "failed on key %s", key);
+ GF_FREE (value);
goto out;
}
remaining_size -= strlen (key) + 1;
@@ -2800,6 +3911,7 @@ done:
if (dict) {
dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
dict_ref (dict);
}
@@ -2814,6 +3926,17 @@ out:
return 0;
}
+static int
+_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_fhandle_pair (filler->this, filler->fd, k, v,
+ filler->flags);
+}
int32_t
posix_fsetxattr (call_frame_t *frame, xlator_t *this,
@@ -2823,8 +3946,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
int32_t op_errno = 0;
struct posix_fd * pfd = NULL;
int _fd = -1;
- data_pair_t * trav = NULL;
- int ret = -1;
+ int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -2844,20 +3968,22 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
_fd = pfd->fd;
dict_del (dict, GFID_XATTR_KEY);
+ dict_del (dict, GF_XATTR_VOL_ID_KEY);
- trav = dict->members_list;
-
- while (trav) {
- ret = posix_fhandle_pair (this, _fd, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- trav = trav->next;
+ filler.fd = _fd;
+ filler.this = this;
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
}
- op_ret = 0;
-
out:
SET_TO_OLD_FS_ID ();
@@ -2866,6 +3992,40 @@ out:
return 0;
}
+int
+_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = (posix_xattr_filler_t *) data;
+ this = filler->this;
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = (struct posix_private *) this->private;
+ char *newkey = NULL;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey);
+ gf_log("remove_xattr", GF_LOG_DEBUG, "key %s => %s" , key,
+ newkey);
+ key = newkey;
+ }
+#endif
+ op_ret = sys_lremovexattr (filler->real_path, key);
+ if (op_ret == -1) {
+ filler->op_errno = errno;
+ if (errno != ENOATTR && errno != EPERM)
+ gf_log (this->name, GF_LOG_ERROR,
+ "removexattr failed on %s (for %s): %s",
+ filler->real_path, key, strerror (errno));
+ }
+#ifdef GF_DARWIN_HOST_OS
+ GF_FREE(newkey);
+#endif
+ return op_ret;
+}
+
int32_t
posix_removexattr (call_frame_t *frame, xlator_t *this,
@@ -2874,6 +4034,7 @@ posix_removexattr (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -2885,10 +4046,32 @@ posix_removexattr (call_frame_t *frame, xlator_t *this,
op_ret = -1;
goto out;
}
+ if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) {
+ gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
+ " on volume-id for file %s", real_path);
+ op_ret = -1;
+ goto out;
+ }
SET_FS_ID (frame->root->uid, frame->root->gid);
+ /**
+ * sending an empty key name with xdata containing the
+ * list of key(s) to be removed implies "bulk remove request"
+ * for removexattr.
+ */
+ if (name && (strcmp (name, "") == 0) && xdata) {
+ filler.real_path = real_path;
+ filler.this = this;
+ op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler);
+ if (op_ret) {
+ op_errno = filler.op_errno;
+ }
+
+ goto out;
+ }
+
op_ret = sys_lremovexattr (real_path, name);
if (op_ret == -1) {
op_errno = errno;
@@ -2916,7 +4099,6 @@ posix_fremovexattr (call_frame_t *frame, xlator_t *this,
int32_t op_errno = 0;
struct posix_fd * pfd = NULL;
int _fd = -1;
- uint64_t tmp_pfd = 0;
int ret = -1;
DECLARE_OLD_FS_ID_VAR;
@@ -2926,16 +4108,19 @@ posix_fremovexattr (call_frame_t *frame, xlator_t *this,
" on gfid for file");
goto out;
}
+ if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) {
+ gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
+ " on volume-id for file");
+ goto out;
+ }
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
-
_fd = pfd->fd;
@@ -3013,9 +4198,31 @@ posix_print_xattr (dict_t *this,
static void
__add_array (int32_t *dest, int32_t *src, int count)
{
+ int i = 0;
+ int32_t destval = 0;
+ for (i = 0; i < count; i++) {
+ destval = ntoh32 (dest[i]);
+ if (destval == 0xffffffff)
+ continue;
+ dest[i] = hton32 (destval + ntoh32 (src[i]));
+ }
+}
+
+static void
+__or_array (int32_t *dest, int32_t *src, int count)
+{
int i = 0;
for (i = 0; i < count; i++) {
- dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i]));
+ dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i]));
+ }
+}
+
+static void
+__and_array (int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i]));
}
}
@@ -3028,6 +4235,168 @@ __add_long_array (int64_t *dest, int64_t *src, int count)
}
}
+static int
+_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int size = 0;
+ int count = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ gf_xattrop_flags_t optype = 0;
+ char *array = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ optype = (gf_xattrop_flags_t)(filler->flags);
+ this = filler->this;
+ inode = filler->inode;
+ count = v->len;
+ array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char);
+
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) {
+ k += XATTR_USER_PREFIX_LEN;
+ }
+ }
+#endif
+
+ LOCK (&inode->lock);
+ {
+ if (filler->real_path) {
+ size = sys_lgetxattr (filler->real_path, k,
+ (char *)array, v->len);
+ } else {
+ size = sys_fgetxattr (filler->fd, k, (char *)array,
+ v->len);
+ }
+
+ op_errno = errno;
+ if ((size == -1) && (op_errno != ENODATA) &&
+ (op_errno != ENOATTR)) {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported by filesystem");
+ } else if (op_errno != ENOENT ||
+ !posix_special_xattr (marker_xattrs,
+ k)) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr failed on %s while doing "
+ "xattrop: Key:%s (%s)",
+ filler->real_path,
+ k, strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "fgetxattr failed on fd=%d while doing "
+ "xattrop: Key:%s (%s)",
+ filler->fd,
+ k, strerror (op_errno));
+ }
+
+ op_ret = -1;
+ goto unlock;
+ }
+
+ switch (optype) {
+
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array ((int32_t *) array, (int32_t *) v->data,
+ v->len / 4);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY64:
+ __add_long_array ((int64_t *) array, (int64_t *) v->data,
+ v->len / 8);
+ break;
+
+ case GF_XATTROP_OR_ARRAY:
+ __or_array ((int32_t *) array,
+ (int32_t *) v->data,
+ v->len / 4);
+ break;
+
+ case GF_XATTROP_AND_ARRAY:
+ __and_array ((int32_t *) array,
+ (int32_t *) v->data,
+ v->len / 4);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown xattrop type (%d) on %s. Please send "
+ "a bug report to gluster-devel@gluster.org",
+ optype, filler->real_path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (filler->real_path) {
+ size = sys_lsetxattr (filler->real_path, k, array,
+ v->len, 0);
+ } else {
+ size = sys_fsetxattr (filler->fd, k, (char *)array,
+ v->len, 0);
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ if (op_ret == -1)
+ goto out;
+
+ op_errno = errno;
+ if (size == -1) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_ERROR,
+ "setxattr failed on %s while doing xattrop: "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsetxattr failed on fd=%d while doing xattrop: "
+ "key=%s (%s)", filler->fd,
+ k, strerror (op_errno));
+
+ op_ret = -1;
+ goto out;
+ } else {
+ size = dict_set_bin (d, k, array, v->len);
+
+ if (size != 0) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_bin failed (path=%s): "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (-size));
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_bin failed (fd=%d): "
+ "key=%s (%s)", filler->fd,
+ k, strerror (-size));
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ array = NULL;
+ }
+
+ array = NULL;
+
+out:
+ return op_ret;
+}
+
/**
* xattrop - xattr operations - for internal use by GlusterFS
* @optype: ADD_ARRAY:
@@ -3039,36 +4408,24 @@ int
do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr)
{
- char *real_path = NULL;
- char *array = NULL;
- int size = 0;
- int count = 0;
-
- int op_ret = 0;
- int op_errno = 0;
-
- int ret = 0;
- int _fd = -1;
- struct posix_fd *pfd = NULL;
-
- data_pair_t *trav = NULL;
-
- char * path = NULL;
- inode_t * inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int _fd = -1;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {0,};
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (xattr, out);
VALIDATE_OR_GOTO (this, out);
- trav = xattr->members_list;
-
if (fd) {
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
+ op_ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (op_ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"failed to get pfd from fd=%p",
fd);
- op_ret = -1;
op_errno = EBADFD;
goto out;
}
@@ -3079,138 +4436,21 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
MAKE_INODE_HANDLE (real_path, this, loc, NULL);
if (real_path) {
- path = gf_strdup (real_path);
inode = loc->inode;
} else if (fd) {
inode = fd->inode;
}
- while (trav && inode) {
- count = trav->value->len;
- array = GF_CALLOC (count, sizeof (char),
- gf_posix_mt_char);
+ filler.this = this;
+ filler.fd = _fd;
+ filler.real_path = real_path;
+ filler.flags = (int)optype;
+ filler.inode = inode;
- LOCK (&inode->lock);
- {
- if (loc) {
- size = sys_lgetxattr (real_path, trav->key, (char *)array,
- trav->value->len);
- } else {
- size = sys_fgetxattr (_fd, trav->key, (char *)array,
- trav->value->len);
- }
-
- op_errno = errno;
- if ((size == -1) && (op_errno != ENODATA) &&
- (op_errno != ENOATTR)) {
- if (op_errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported by filesystem");
- } else if (op_errno != ENOENT ||
- !posix_special_xattr (marker_xattrs,
- trav->key)) {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on %s while doing "
- "xattrop: Key:%s (%s)", path,
- trav->key, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fgetxattr failed on fd=%d while doing "
- "xattrop: Key:%s (%s)", _fd,
- trav->key, strerror (op_errno));
- }
-
- op_ret = -1;
- goto unlock;
- }
-
- switch (optype) {
-
- case GF_XATTROP_ADD_ARRAY:
- __add_array ((int32_t *) array, (int32_t *) trav->value->data,
- trav->value->len / 4);
- break;
-
- case GF_XATTROP_ADD_ARRAY64:
- __add_long_array ((int64_t *) array, (int64_t *) trav->value->data,
- trav->value->len / 8);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unknown xattrop type (%d) on %s. Please send "
- "a bug report to gluster-devel@nongnu.org",
- optype, path);
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
-
- if (loc) {
- size = sys_lsetxattr (real_path, trav->key, array,
- trav->value->len, 0);
- } else {
- size = sys_fsetxattr (_fd, trav->key, (char *)array,
- trav->value->len, 0);
- }
- }
- unlock:
- UNLOCK (&inode->lock);
-
- if (op_ret == -1)
- goto out;
-
- op_errno = errno;
- if (size == -1) {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr failed on %s while doing xattrop: "
- "key=%s (%s)", path,
- trav->key, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr failed on fd=%d while doing xattrop: "
- "key=%s (%s)", _fd,
- trav->key, strerror (op_errno));
-
- op_ret = -1;
- goto out;
- } else {
- size = dict_set_bin (xattr, trav->key, array,
- trav->value->len);
-
- if (size != 0) {
- if (loc)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (path=%s): "
- "key=%s (%s)", path,
- trav->key, strerror (-size));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (fd=%d): "
- "key=%s (%s)", _fd,
- trav->key, strerror (-size));
-
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- array = NULL;
- }
-
- array = NULL;
- trav = trav->next;
- }
+ op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair,
+ &filler);
out:
- if (array)
- GF_FREE (array);
-
- if (path)
- GF_FREE (path);
STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL);
return 0;
@@ -3468,7 +4708,7 @@ posix_fentrylk (call_frame_t *frame, xlator_t *this,
int
posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
{
off_t in_case = -1;
size_t filled = 0;
@@ -3478,6 +4718,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
int32_t this_size = -1;
gf_dirent_t *this_entry = NULL;
uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+ struct stat stbuf = {0,};
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
+
+ if (skip_dirs) {
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ hpath = alloca (len + 256); /* NAME_MAX */
+ posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
+ len = strlen (hpath);
+ hpath[len] = '/';
+ }
if (!off) {
rewinddir (dir);
@@ -3509,10 +4761,6 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
break;
}
- if ((uuid_compare (fd->inode->gfid, rootgfid) == 0)
- && (!strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)))
- continue;
-
#ifdef __NetBSD__
/*
* NetBSD with UFS1 backend uses backing files for
@@ -3532,6 +4780,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
continue;
}
+ if (skip_dirs) {
+ if (DT_ISDIR (entry->d_type)) {
+ continue;
+ } else if (hpath) {
+ strcpy (&hpath[len+1],entry->d_name);
+ ret = lstat (hpath, &stbuf);
+ if (!ret && S_ISDIR (stbuf.st_mode))
+ continue;
+ }
+ }
+
this_size = max (sizeof (gf_dirent_t),
sizeof (gfs3_dirplist))
+ strlen (entry->d_name) + 1;
@@ -3551,6 +4810,7 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
}
this_entry->d_off = telldir (dir);
this_entry->d_ino = entry->d_ino;
+ this_entry->d_type = entry->d_type;
list_add_tail (&this_entry->list, &entries->list);
@@ -3583,24 +4843,78 @@ posix_entry_xattr_fill (xlator_t *this, inode_t *inode,
}
+
+int
+posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict)
+{
+ gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ char *hpath = NULL;
+ int len = 0;
+ struct iatt stbuf = {0, };
+ uuid_t gfid;
+
+ if (list_empty(&entries->list))
+ return 0;
+
+ itable = fd->inode->table;
+
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ hpath = alloca (len + 256); /* NAME_MAX */
+ posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
+ len = strlen (hpath);
+ hpath[len] = '/';
+
+ list_for_each_entry (entry, &entries->list, list) {
+ memset (gfid, 0, 16);
+ inode = inode_grep (fd->inode->table, fd->inode,
+ entry->d_name);
+ if (inode)
+ uuid_copy (gfid, inode->gfid);
+
+ strcpy (&hpath[len+1], entry->d_name);
+
+ posix_pstat (this, gfid, hpath, &stbuf);
+
+ if (!inode)
+ inode = inode_find (itable, stbuf.ia_gfid);
+
+ if (!inode)
+ inode = inode_new (itable);
+
+ entry->inode = inode;
+
+ if (dict) {
+ entry->dict =
+ posix_entry_xattr_fill (this, entry->inode,
+ fd, entry->d_name,
+ dict, &stbuf);
+ dict_ref (entry->dict);
+ }
+
+ entry->d_stat = stbuf;
+ if (stbuf.ia_ino)
+ entry->d_ino = stbuf.ia_ino;
+ inode = NULL;
+ }
+
+ return 0;
+}
+
+
int32_t
posix_do_readdir (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict)
{
- struct posix_fd *pfd = NULL;
- DIR *dir = NULL;
- int ret = -1;
- int count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- gf_dirent_t entries;
- struct iatt stbuf = {0, };
- gf_dirent_t *tmp_entry = NULL;
- inode_table_t *itable = NULL;
-#ifdef IGNORE_READDIRP_ATTRS
- uuid_t gfid;
- ia_type_t entry_type = 0;
-#endif
+ struct posix_fd *pfd = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ gf_dirent_t entries;
+ int32_t skip_dirs = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -3623,9 +4937,30 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
"dir is NULL for fd=%p", fd);
op_errno = EINVAL;
goto out;
- }
+ }
- count = posix_fill_readdir (fd, dir, off, size, &entries);
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ LOCK (&fd->lock);
+ {
+ /* posix_fill_readdir performs multiple separate individual
+ readdir() calls to fill up the buffer.
+
+ In case of NFS where the same anonymous FD is shared between
+ different applications, reading a common directory can
+ result in the anonymous fd getting re-used unsafely between
+ the two readdir requests (in two different io-threads).
+
+ It would also help, in the future, to replace the loop
+ around readdir() with a single large getdents() call.
+ */
+ count = posix_fill_readdir (fd, dir, off, size, &entries, this,
+ skip_dirs);
+ }
+ UNLOCK (&fd->lock);
/* pick ENOENT to indicate EOF */
op_errno = errno;
@@ -3634,43 +4969,7 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
if (whichop != GF_FOP_READDIRP)
goto out;
- itable = fd->inode->table;
-
- list_for_each_entry (tmp_entry, &entries.list, list) {
-#ifdef IGNORE_READDIRP_ATTRS
- ret = inode_grep_for_gfid (fd->inode->table, fd->inode,
- tmp_entry->d_name, gfid,
- &entry_type);
- if (ret == 0) {
- memset (&stbuf, 0, sizeof (stbuf));
- uuid_copy (stbuf.ia_gfid, gfid);
- posix_fill_ino_from_gfid (this, &stbuf);
- stbuf.ia_type = entry_type;
- } else {
- posix_istat (this, fd->inode->gfid,
- tmp_entry->d_name, &stbuf);
- }
-#else
- posix_istat (this, fd->inode->gfid,
- tmp_entry->d_name, &stbuf);
-#endif
- if (stbuf.ia_ino)
- tmp_entry->d_ino = stbuf.ia_ino;
-
- if (dict) {
- tmp_entry->inode = inode_find (itable, stbuf.ia_gfid);
- if (!tmp_entry->inode)
- tmp_entry->inode = inode_new (itable);
-
- tmp_entry->dict =
- posix_entry_xattr_fill (this, tmp_entry->inode,
- fd, tmp_entry->d_name,
- dict, &stbuf);
- dict_ref (tmp_entry->dict);
- }
-
- tmp_entry->d_stat = stbuf;
- }
+ posix_readdirp_fill (this, fd, &entries, dict);
out:
STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL);
@@ -3694,6 +4993,32 @@ int32_t
posix_readdirp (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t off, dict_t *dict)
{
+ gf_dirent_t entries;
+ int32_t op_ret = -1, op_errno = 0;
+ gf_dirent_t *entry = NULL;
+
+
+ if ((dict != NULL) && (dict_get (dict, GET_ANCESTRY_DENTRY_KEY))) {
+ INIT_LIST_HEAD (&entries.list);
+
+ op_ret = posix_get_ancestry (this, fd->inode, &entries, NULL,
+ POSIX_ANCESTRY_DENTRY,
+ &op_errno, dict);
+ if (op_ret >= 0) {
+ op_ret = 0;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ op_ret++;
+ }
+ }
+
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries,
+ NULL);
+
+ gf_dirent_free (&entries);
+ return 0;
+ }
+
posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict);
return 0;
}
@@ -3736,28 +5061,26 @@ int32_t
posix_rchecksum (call_frame_t *frame, xlator_t *this,
fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
{
- char *buf = NULL;
-
- int _fd = -1;
-
- struct posix_fd *pfd = NULL;
-
- int op_ret = -1;
- int op_errno = 0;
-
- int ret = 0;
-
- int32_t weak_checksum = 0;
- unsigned char strong_checksum[MD5_DIGEST_LENGTH];
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ int32_t weak_checksum = 0;
+ unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
+ struct posix_private *priv = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
+ priv = this->private;
memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
- buf = GF_CALLOC (1, len, gf_posix_mt_char);
- if (!buf) {
+ alloc_buf = _page_aligned_alloc (len, &buf);
+ if (!alloc_buf) {
op_errno = ENOMEM;
goto out;
}
@@ -3772,25 +5095,36 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,
_fd = pfd->fd;
- ret = pread (_fd, buf, len, offset);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pread of %d bytes returned %d (%s)",
- len, ret, strerror (errno));
+ LOCK (&fd->lock);
+ {
+ if (priv->aio_capable && priv->aio_init_done)
+ __posix_fd_set_odirect (fd, pfd, 0, offset, len);
+
+ ret = pread (_fd, buf, len, offset);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pread of %d bytes returned %d (%s)",
+ len, ret, strerror (errno));
+
+ op_errno = errno;
+ }
- op_errno = errno;
- goto out;
}
+ UNLOCK (&fd->lock);
- weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len);
- gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum);
+ if (ret < 0)
+ goto out;
- GF_FREE (buf);
+ weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret);
+ gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) ret, (unsigned char *) strong_checksum);
op_ret = 0;
out:
STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,
weak_checksum, strong_checksum, NULL);
+
+ GF_FREE (alloc_buf);
+
return 0;
}
@@ -3838,6 +5172,148 @@ mem_acct_init (xlator_t *this)
return ret;
}
+static int
+posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+ struct stat st = {0,};
+
+ priv = this->private;
+
+ ret = sys_lstat (priv->base_path, &st);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to stat "
+ "brick path %s (%s)",
+ priv->base_path, strerror (errno));
+ return ret;
+ }
+
+ if ((uid == -1 || st.st_uid == uid) &&
+ (gid == -1 || st.st_gid == gid))
+ return 0;
+
+ ret = sys_chown (priv->base_path, uid, gid);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "uid/gid for brick path %s, %s",
+ priv->base_path, strerror (errno));
+
+ return ret;
+}
+
+
+static int
+set_batch_fsync_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->batch_fsync_mode = BATCH_NONE;
+ else if (strcmp (str, "syncfs") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS;
+ else if (strcmp (str, "syncfs-single-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
+ else if (strcmp (str, "syncfs-reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
+ else if (strcmp (str, "reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
+ else
+ return -1;
+
+ return 0;
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static int
+set_xattr_user_namespace_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->xattr_user_namespace = XATTR_NONE;
+ else if (strcmp (str, "strip") == 0)
+ priv->xattr_user_namespace = XATTR_STRIP;
+ else if (strcmp (str, "append") == 0)
+ priv->xattr_user_namespace = XATTR_APPEND;
+ else if (strcmp (str, "both") == 0)
+ priv->xattr_user_namespace = XATTR_BOTH;
+ else
+ return -1;
+ return 0;
+}
+#endif
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ int32_t uid = -1;
+ int32_t gid = -1;
+ char *batch_fsync_mode_str = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("brick-uid", uid, options, int32, out);
+ GF_OPTION_RECONF ("brick-gid", gid, options, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner (this, uid, gid);
+
+ GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str,
+ options, str, out);
+
+ if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+
+ char *xattr_user_namespace_mode_str = NULL;
+
+ GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str,
+ options, str, out);
+
+ if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown xattr user namespace mode string: %s",
+ xattr_user_namespace_mode_str);
+ goto out;
+ }
+
+#endif
+
+ GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
+ options, bool, out);
+
+ if (priv->aio_configured)
+ posix_aio_on (this);
+ else
+ posix_aio_off (this);
+
+ GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo,
+ options, bool, out);
+
+ if (priv->node_uuid_pathinfo &&
+ (uuid_is_null (priv->glusterd_uuid))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
+ options, uint32, out);
+ posix_spawn_health_check_thread (this);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
/**
* init -
*/
@@ -3852,12 +5328,16 @@ init (xlator_t *this)
int dict_ret = 0;
int ret = 0;
int op_ret = -1;
+ ssize_t size = -1;
int32_t janitor_sleep = 0;
uuid_t old_uuid = {0,};
uuid_t dict_uuid = {0,};
uuid_t gfid = {0,};
uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
char *guuid = NULL;
+ int32_t uid = -1;
+ int32_t gid = -1;
+ char *batch_fsync_mode_str;
dir_data = dict_get (this->options, "directory");
@@ -3938,9 +5418,9 @@ init (xlator_t *this)
ret = -1;
goto out;
}
- op_ret = sys_lgetxattr (dir_data->data,
- "trusted.glusterfs.volume-id", old_uuid, 16);
- if (op_ret == 16) {
+ size = sys_lgetxattr (dir_data->data,
+ "trusted.glusterfs.volume-id", old_uuid, 16);
+ if (size == 16) {
if (uuid_compare (old_uuid, dict_uuid)) {
gf_log (this->name, GF_LOG_ERROR,
"mismatching volume-id (%s) received. "
@@ -3949,22 +5429,20 @@ init (xlator_t *this)
ret = -1;
goto out;
}
- } else if ((op_ret == -1) && (errno == ENODATA)) {
- /* Using the export for first time */
- op_ret = sys_lsetxattr (dir_data->data,
- "trusted.glusterfs.volume-id",
- dict_uuid, 16, 0);
- if (op_ret == -1) {
+ } else if ((size == -1) && (errno == ENODATA)) {
+
gf_log (this->name, GF_LOG_ERROR,
- "failed to set volume id on export");
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
ret = -1;
goto out;
- }
- } else if ((op_ret == -1) && (errno != ENODATA)) {
+
+ } else if ((size == -1) && (errno != ENODATA)) {
/* Wrong 'volume-id' is set, it should be error */
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to fetch volume-id (%s)",
dir_data->data, strerror (errno));
+ ret = -1;
goto out;
} else {
ret = -1;
@@ -3976,8 +5454,8 @@ init (xlator_t *this)
/* Now check if the export directory has some other 'gfid',
other than that of root '/' */
- ret = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16);
- if (ret == 16) {
+ size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16);
+ if (size == 16) {
if (!__is_root_gfid (gfid)) {
gf_log (this->name, GF_LOG_WARNING,
"%s: gfid (%s) is not that of glusterfs '/' ",
@@ -3985,34 +5463,37 @@ init (xlator_t *this)
ret = -1;
goto out;
}
- } else if (ret != -1) {
+ } else if (size != -1) {
/* Wrong 'gfid' is set, it should be error */
gf_log (this->name, GF_LOG_WARNING,
"%s: wrong value set as gfid",
dir_data->data);
ret = -1;
goto out;
- } else if ((ret == -1) && (errno != ENODATA)) {
+ } else if ((size == -1) && (errno != ENODATA) &&
+ (errno != ENOATTR)) {
/* Wrong 'gfid' is set, it should be error */
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to fetch gfid (%s)",
dir_data->data, strerror (errno));
+ ret = -1;
goto out;
} else {
/* First time volume, set the GFID */
- ret = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid,
+ size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid,
16, XATTR_CREATE);
- if (ret) {
+ if (size) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set gfid (%s)",
dir_data->data, strerror (errno));
+ ret = -1;
goto out;
}
}
- op_ret = sys_lgetxattr (dir_data->data, "system.posix_acl_access",
- NULL, 0);
- if ((op_ret < 0) && (errno == ENOTSUP))
+ size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR,
+ NULL, 0);
+ if ((size < 0) && (errno == ENOTSUP))
gf_log (this->name, GF_LOG_WARNING,
"Posix access control list is not supported.");
@@ -4027,20 +5508,6 @@ init (xlator_t *this)
_private->base_path = gf_strdup (dir_data->data);
_private->base_path_length = strlen (_private->base_path);
- _private->trash_path = GF_CALLOC (1, _private->base_path_length
- + strlen ("/")
- + strlen (GF_REPLICATE_TRASH_DIR)
- + 1,
- gf_posix_mt_trash_path);
-
- if (!_private->trash_path) {
- ret = -1;
- goto out;
- }
-
- strncpy (_private->trash_path, _private->base_path, _private->base_path_length);
- strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR);
-
LOCK_INIT (&_private->lock);
ret = dict_get_str (this->options, "hostname", &_private->hostname);
@@ -4105,6 +5572,24 @@ init (xlator_t *this)
"for every open)");
}
+ tmp_data = dict_get (this->options, "update-link-count-parent");
+ if (tmp_data) {
+ if (gf_string2boolean (tmp_data->data,
+ &_private->update_pgfid_nlinks) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong value provided for "
+ "'update-link-count-parent'");
+ goto out;
+ }
+ if (_private->update_pgfid_nlinks)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "update-link-count-parent is enabled. Thus for each "
+ "file an extended attribute representing the "
+ "number of hardlinks for that file within the "
+ "same parent directory is set.");
+ }
+
ret = dict_get_str (this->options, "glusterd-uuid", &guuid);
if (!ret) {
if (uuid_parse (guuid, _private->glusterd_uuid))
@@ -4176,11 +5661,93 @@ init (xlator_t *this)
goto out;
}
+ op_ret = posix_handle_trash_init (this);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Posix landfill setup failed");
+ ret = -1;
+ goto out;
+ }
+
+ _private->aio_init_done = _gf_false;
+ _private->aio_capable = _gf_false;
+
+ GF_OPTION_INIT ("brick-uid", uid, int32, out);
+ GF_OPTION_INIT ("brick-gid", gid, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner (this, uid, gid);
+
+ GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out);
+
+ if (_private->aio_configured) {
+ op_ret = posix_aio_on (this);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Posix AIO init failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ GF_OPTION_INIT ("node-uuid-pathinfo",
+ _private->node_uuid_pathinfo, bool, out);
+ if (_private->node_uuid_pathinfo &&
+ (uuid_is_null (_private->glusterd_uuid))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ _private->health_check_active = _gf_false;
+ GF_OPTION_INIT ("health-check-interval",
+ _private->health_check_interval, uint32, out);
+ if (_private->health_check_interval)
+ posix_spawn_health_check_thread (this);
+
pthread_mutex_init (&_private->janitor_lock, NULL);
pthread_cond_init (&_private->janitor_cond, NULL);
INIT_LIST_HEAD (&_private->janitor_fds);
posix_spawn_janitor_thread (this);
+
+ pthread_mutex_init (&_private->fsync_mutex, NULL);
+ pthread_cond_init (&_private->fsync_cond, NULL);
+ INIT_LIST_HEAD (&_private->fsyncs);
+
+ ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fsyncer thread"
+ " creation failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out);
+
+ if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+
+ char *xattr_user_namespace_mode_str = NULL;
+
+ GF_OPTION_INIT ("xattr-user-namespace-mode",
+ xattr_user_namespace_mode_str, str, out);
+
+ if (set_xattr_user_namespace_mode (_private,
+ xattr_user_namespace_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown xattr user namespace mode string: %s",
+ xattr_user_namespace_mode_str);
+ goto out;
+ }
+#endif
+
+ GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
+ uint32, out);
out:
return ret;
}
@@ -4246,6 +5813,10 @@ struct xlator_fops fops = {
.fxattrop = posix_fxattrop,
.setattr = posix_setattr,
.fsetattr = posix_fsetattr,
+ .fallocate = _posix_fallocate,
+ .discard = posix_discard,
+ .zerofill = posix_zerofill,
+ .ipc = posix_ipc,
};
struct xlator_cbks cbks = {
@@ -4273,5 +5844,77 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_ANY },
{ .key = {"glusterd-uuid"},
.type = GF_OPTION_TYPE_STR },
+ {
+ .key = {"linux-aio"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Support for native Linux AIO"
+ },
+ {
+ .key = {"brick-uid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = -1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "-1",
+ .description = "Support for setting uid of brick's owner"
+ },
+ {
+ .key = {"brick-gid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = -1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "-1",
+ .description = "Support for setting gid of brick's owner"
+ },
+ { .key = {"node-uuid-pathinfo"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "return glusterd's node-uuid in pathinfo xattr"
+ " string instead of hostname"
+ },
+ {
+ .key = {"health-check-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "30",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds for a filesystem health check, "
+ "set to 0 to disable"
+ },
+ { .key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+ .description = "Possible values:\n"
+ "\t- syncfs: Perform one syncfs() on behalf oa batch"
+ "of fsyncs.\n"
+ "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch"
+ " of fsyncs and one fsync() per batch.\n"
+ "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch"
+ " of fsyncs and fsync() each file in the batch in reverse order.\n"
+ " in reverse order.\n"
+ "\t- reverse-fsync: Perform fsync() of each file in the batch in"
+ " reverse order."
+ },
+ { .key = {"batch-fsync-delay-usec"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description = "Num of usecs to wait for aggregating fsync"
+ " requests",
+ },
+ { .key = {"update-link-count-parent"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable placeholders for gfid to path conversion"
+ },
+#if GF_DARWIN_HOST_OS
+ { .key = {"xattr-user-namespace-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "none",
+ .description = "Option to control XATTR user namespace on the raw filesystem: "
+ "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n"
+ " The raw filesystem will not be compatible with OS X Finder.\n"
+ "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n"
+ },
+#endif
{ .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 7c2b47bb0..c9bfc984d 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _POSIX_H
#define _POSIX_H
@@ -53,6 +43,20 @@
#include "timer.h"
#include "posix-mem-types.h"
#include "posix-handle.h"
+#include "call-stub.h"
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#include "posix-aio.h"
+#endif
+
+#define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/
+#define MAX_NO_VECT 1024
+
+#define POSIX_GFID_HANDLE_SIZE(base_path_len) (base_path_len + SLEN("/") \
+ + SLEN(GF_HIDDEN_PATH) + SLEN("/") \
+ + SLEN("00/") \
+ + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */;
/**
* posix_fd - internal structure common to file and directory fd's
@@ -62,9 +66,7 @@ struct posix_fd {
int fd; /* fd returned by the kernel */
int32_t flags; /* flags for open/creat */
DIR * dir; /* handle returned by the kernel */
- int flushwrites;
int odirect;
- int op_performed;
struct list_head list; /* to add to the janitor list */
};
@@ -124,8 +126,63 @@ struct posix_private {
/* uuid of glusterd that swapned the brick process */
uuid_t glusterd_uuid;
+ gf_boolean_t aio_configured;
+ gf_boolean_t aio_init_done;
+ gf_boolean_t aio_capable;
+#ifdef HAVE_LIBAIO
+ io_context_t ctxp;
+ pthread_t aiothread;
+#endif
+
+ /* node-uuid in pathinfo xattr */
+ gf_boolean_t node_uuid_pathinfo;
+
+ pthread_t fsyncer;
+ struct list_head fsyncs;
+ pthread_mutex_t fsync_mutex;
+ pthread_cond_t fsync_cond;
+ int fsync_queue_count;
+
+ enum {
+ BATCH_NONE = 0,
+ BATCH_SYNCFS,
+ BATCH_SYNCFS_SINGLE_FSYNC,
+ BATCH_REVERSE_FSYNC,
+ BATCH_SYNCFS_REVERSE_FSYNC
+ } batch_fsync_mode;
+
+ uint32_t batch_fsync_delay_usec;
+ gf_boolean_t update_pgfid_nlinks;
+
+ /* seconds to sleep between health checks */
+ uint32_t health_check_interval;
+ pthread_t health_check;
+ gf_boolean_t health_check_active;
+
+#ifdef GF_DARWIN_HOST_OS
+ enum {
+ XATTR_NONE = 0,
+ XATTR_STRIP,
+ XATTR_APPEND,
+ XATTR_BOTH,
+ } xattr_user_namespace;
+#endif
+
};
+typedef struct {
+ xlator_t *this;
+ const char *real_path;
+ dict_t *xattr;
+ struct iatt *stbuf;
+ loc_t *loc;
+ inode_t *inode; /* for all do_xattrop() key handling */
+ int fd;
+ int flags;
+ int32_t op_errno;
+} posix_xattr_filler_t;
+
+
#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path)
#define POSIX_BASE_PATH_LEN(this) (((struct posix_private *)this->private)->base_path_length)
@@ -140,23 +197,33 @@ int posix_pstat (xlator_t *this, uuid_t gfid, const char *real_path,
struct iatt *iatt);
dict_t *posix_lookup_xattr_fill (xlator_t *this, const char *path,
loc_t *loc, dict_t *xattr, struct iatt *buf);
-int posix_handle_pair (xlator_t *this, const char *real_path,
- data_pair_t *trav, int flags);
-int posix_fhandle_pair (xlator_t *this, int fd, data_pair_t *trav, int flags);
+int posix_handle_pair (xlator_t *this, const char *real_path, char *key,
+ data_t *value, int flags);
+int posix_fhandle_pair (xlator_t *this, int fd, char *key, data_t *value,
+ int flags);
void posix_spawn_janitor_thread (xlator_t *this);
int posix_get_file_contents (xlator_t *this, uuid_t pargfid,
const char *name, char **contents);
-int posix_set_file_contents (xlator_t *this, const char *path,
- data_pair_t *trav, int flags);
+int posix_set_file_contents (xlator_t *this, const char *path, char *key,
+ data_t *value, int flags);
int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req);
-int posix_gfid_heal (xlator_t *this, const char *path, dict_t *xattr_req);
+int posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req);
int posix_entry_create_xattr_set (xlator_t *this, const char *path,
dict_t *dict);
int posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd);
-int posix_fd_ctx_get_off (fd_t *fd, xlator_t *this, struct posix_fd **pfd,
- off_t off);
void posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf);
gf_boolean_t posix_special_xattr (char **pattern, char *key);
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size);
+void posix_spawn_health_check_thread (xlator_t *this);
+
+void *posix_fsyncer (void *);
+int
+posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type, int32_t *op_errno,
+ dict_t *xdata);
#endif /* _POSIX_H */