summaryrefslogtreecommitdiffstats
path: root/xlators/storage
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage')
-rw-r--r--xlators/storage/Makefile.am4
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c528
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c1021
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h27
-rw-r--r--xlators/storage/bd/src/bd.c2450
-rw-r--r--xlators/storage/bd/src/bd.h173
-rw-r--r--xlators/storage/posix/src/Makefile.am22
-rw-r--r--xlators/storage/posix/src/posix-aio.c957
-rw-r--r--xlators/storage/posix/src/posix-aio.h27
-rw-r--r--xlators/storage/posix/src/posix-common.c1524
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c2496
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c243
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h28
-rw-r--r--xlators/storage/posix/src/posix-handle.c1532
-rw-r--r--xlators/storage/posix/src/posix-handle.h400
-rw-r--r--xlators/storage/posix/src/posix-helpers.c4483
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c6004
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h118
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h20
-rw-r--r--xlators/storage/posix/src/posix-messages.h74
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c916
-rw-r--r--xlators/storage/posix/src/posix-metadata.h71
-rw-r--r--xlators/storage/posix/src/posix.c5928
-rw-r--r--xlators/storage/posix/src/posix.h746
28 files changed, 16971 insertions, 12916 deletions
diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am
index c08e8e41bca..5e3ed0eb93b 100644
--- a/xlators/storage/Makefile.am
+++ b/xlators/storage/Makefile.am
@@ -1,7 +1,3 @@
SUBDIRS = posix
-if ENABLE_BD_XLATOR
-SUBDIRS += bd
-endif
-
CLEANFILES =
diff --git a/xlators/storage/bd/Makefile.am b/xlators/storage/bd/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/storage/bd/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/storage/bd/src/Makefile.am b/xlators/storage/bd/src/Makefile.am
deleted file mode 100644
index 60ceff31b20..00000000000
--- a/xlators/storage/bd/src/Makefile.am
+++ /dev/null
@@ -1,20 +0,0 @@
-if ENABLE_BD_XLATOR
-xlator_LTLIBRARIES = bd.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
-
-bd_la_LDFLAGS = -module -avoid-version
-LIBBD = -llvm2app -lrt
-bd_la_SOURCES = bd.c bd-helper.c bd-aio.c
-bd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD) $(LIBAIO)
-
-noinst_HEADERS = bd.h bd-aio.h bd-mem-types.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src
-
-AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
-endif
diff --git a/xlators/storage/bd/src/bd-aio.c b/xlators/storage/bd/src/bd-aio.c
deleted file mode 100644
index 9dc13b3ec60..00000000000
--- a/xlators/storage/bd/src/bd-aio.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- Copyright IBM, Corp. 2013
-
- This file is part of GlusterFS.
-
- Author: M. Mohan Kumar <mohan@in.ibm.com>
-
- Based on posix-aio.c
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <lvm2app.h>
-#include <sys/uio.h>
-
-#include "xlator.h"
-#include "glusterfs.h"
-#include "defaults.h"
-#include "bd.h"
-#include "bd-aio.h"
-
-#ifdef HAVE_LIBAIO
-#include <libaio.h>
-#include "bd-mem-types.h"
-
-struct bd_aio_cb {
- struct iocb iocb;
- call_frame_t *frame;
- struct iobuf *iobuf;
- struct iobref *iobref;
- struct iatt prebuf;
- int op;
- off_t offset;
- fd_t *fd;
-};
-
-void
-__bd_fd_set_odirect (fd_t *fd, bd_fd_t *bd_fd, int opflags,
- off_t offset, size_t size)
-{
- int odirect = 0;
- int flags = 0;
- int ret = 0;
-
- odirect = bd_fd->odirect;
-
- if ((fd->flags|opflags) & O_DIRECT) {
- /* if instructed, use O_DIRECT always */
- odirect = 1;
- } else {
- /* else use O_DIRECT when feasible */
- if ((offset|size) & 0xfff)
- odirect = 0;
- else
- odirect = 1;
- }
-
- if (!odirect && bd_fd->odirect) {
- flags = fcntl (bd_fd->fd, F_GETFL);
- ret = fcntl (bd_fd->fd, F_SETFL, (flags & (~O_DIRECT)));
- bd_fd->odirect = 0;
- }
-
- if (odirect && !bd_fd->odirect) {
- flags = fcntl (bd_fd->fd, F_GETFL);
- ret = fcntl (bd_fd->fd, F_SETFL, (flags | O_DIRECT));
- bd_fd->odirect = 1;
- }
-
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d",
- strerror (errno), bd_fd->fd, flags, bd_fd->odirect);
- }
-}
-
-int
-bd_aio_readv_complete (struct bd_aio_cb *paiocb, int res, int res2)
-{
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- struct iobuf *iobuf = NULL;
- struct iatt postbuf = {0,};
- int op_ret = -1;
- int op_errno = 0;
- struct iovec iov;
- struct iobref *iobref = NULL;
- off_t offset = 0;
- bd_attr_t *bdatt = NULL;
-
- frame = paiocb->frame;
- this = frame->this;
- iobuf = paiocb->iobuf;
- offset = paiocb->offset;
-
- if (res < 0) {
- op_ret = -1;
- op_errno = -res;
- gf_log (this->name, GF_LOG_ERROR,
- "readv(async) failed fd=%p,size=%lu,offset=%llu (%d/%s)",
- paiocb->fd, paiocb->iocb.u.c.nbytes,
- (unsigned long long) paiocb->offset,
- res, strerror (op_errno));
- goto out;
- }
-
- bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt);
- memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt));
-
- op_ret = res;
- op_errno = 0;
-
- iobref = iobref_new ();
- if (!iobref) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf_ptr (iobuf);
- iov.iov_len = op_ret;
-
- /* Hack to notify higher layers of EOF. */
- if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
- op_errno = ENOENT;
-
-out:
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
- &postbuf, iobref, NULL);
- if (iobuf)
- iobuf_unref (iobuf);
- if (iobref)
- iobref_unref (iobref);
-
- GF_FREE (paiocb);
-
- return 0;
-}
-
-int
-bd_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- int _fd = -1;
- struct iobuf *iobuf = NULL;
- bd_fd_t *bd_fd = NULL;
- int ret = -1;
- struct bd_aio_cb *paiocb = NULL;
- bd_priv_t *priv = NULL;
- struct iocb *iocb = NULL;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- priv = this->private;
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd) {
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size, offset,
- flags, xdata);
- return 0;
- }
- _fd = bd_fd->fd;
- bd_inode_ctx_get (fd->inode, this, &bdatt);
- if (!size) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
- goto err;
- }
-
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (!iobuf) {
- op_errno = ENOMEM;
- goto err;
- }
-
- paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_bd_aio_cb);
- if (!paiocb) {
- op_errno = ENOMEM;
- goto err;
- }
-
- paiocb->frame = frame;
- paiocb->iobuf = iobuf;
- paiocb->offset = offset;
- paiocb->op = GF_FOP_READ;
- paiocb->fd = fd;
-
- paiocb->iocb.data = paiocb;
- paiocb->iocb.aio_fildes = _fd;
- paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
- paiocb->iocb.aio_reqprio = 0;
- paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
- paiocb->iocb.u.c.nbytes = size;
- paiocb->iocb.u.c.offset = offset;
-
- iocb = &paiocb->iocb;
-
- LOCK (&fd->lock);
- {
- __bd_fd_set_odirect (fd, bd_fd, flags, offset, size);
-
- ret = io_submit (priv->ctxp, 1, &iocb);
- }
- UNLOCK (&fd->lock);
-
- if (ret != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "io_submit() returned %d", ret);
- op_errno = -ret;
- goto err;
- }
-
- return 0;
-err:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
- if (iobuf)
- iobuf_unref (iobuf);
-
- if (paiocb)
- GF_FREE (paiocb);
-
- return 0;
-}
-
-int
-bd_aio_writev_complete (struct bd_aio_cb *paiocb, int res, int res2)
-{
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- struct iatt prebuf = {0,};
- struct iatt postbuf = {0,};
- int op_ret = -1;
- int op_errno = 0;
- bd_attr_t *bdatt = NULL;
-
- frame = paiocb->frame;
- prebuf = paiocb->prebuf;
- this = frame->this;
-
- if (res < 0) {
- op_ret = -1;
- op_errno = -res;
- gf_log (this->name, GF_LOG_ERROR,
- "writev(async) failed fd=%p,offset=%llu (%d/%s)",
- paiocb->fd, (unsigned long long) paiocb->offset, res,
- strerror (op_errno));
-
- goto out;
- }
-
- bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt);
- bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
- memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt));
-
- op_ret = res;
- op_errno = 0;
-
-out:
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
- NULL);
-
- if (paiocb) {
- if (paiocb->iobref)
- iobref_unref (paiocb->iobref);
- GF_FREE (paiocb);
- }
-
- return 0;
-}
-
-int
-bd_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *iov, int count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- int _fd = -1;
- bd_fd_t *bd_fd = NULL;
- int ret = -1;
- struct bd_aio_cb *paiocb = NULL;
- bd_priv_t *priv = NULL;
- struct iocb *iocb = NULL;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- priv = this->private;
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd) {
- STACK_WIND (frame, default_writev_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- fd, iov, count, offset, flags, iobref, xdata);
- return 0;
- }
-
- bd_inode_ctx_get (fd->inode, this, &bdatt);
-
- _fd = bd_fd->fd;
-
- paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_bd_aio_cb);
- if (!paiocb) {
- op_errno = ENOMEM;
- goto err;
- }
-
-
- paiocb->frame = frame;
- paiocb->offset = offset;
- paiocb->op = GF_FOP_WRITE;
- paiocb->fd = fd;
-
- paiocb->iocb.data = paiocb;
- paiocb->iocb.aio_fildes = _fd;
- paiocb->iobref = iobref_ref (iobref);
- paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
- paiocb->iocb.aio_reqprio = 0;
- paiocb->iocb.u.v.vec = iov;
- paiocb->iocb.u.v.nr = count;
- paiocb->iocb.u.v.offset = offset;
-
- iocb = &paiocb->iocb;
-
- memcpy (&paiocb->prebuf, &bdatt->iatt, sizeof (struct iatt));
- LOCK (&fd->lock);
- {
- __bd_fd_set_odirect (fd, bd_fd, flags, offset,
- iov_length (iov, count));
-
- ret = io_submit (priv->ctxp, 1, &iocb);
- }
- UNLOCK (&fd->lock);
-
- if (ret != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "io_submit() returned %d", ret);
- op_errno = -ret;
- goto err;
- }
-
- return 0;
-err:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
-
- if (paiocb) {
- if (paiocb->iobref)
- iobref_unref (paiocb->iobref);
- GF_FREE (paiocb);
- }
-
- return 0;
-}
-
-void *
-bd_aio_thread (void *data)
-{
- xlator_t *this = NULL;
- bd_priv_t *priv = NULL;
- int ret = 0;
- int i = 0;
- struct io_event *event = NULL;
- struct bd_aio_cb *paiocb = NULL;
- struct io_event events[BD_AIO_MAX_NR_GETEVENTS];
- struct timespec ts = {0, };
-
- this = data;
- THIS = this;
- priv = this->private;
-
- ts.tv_sec = 5;
- for (;;) {
- memset (&events[0], 0, sizeof (events));
- ret = io_getevents (priv->ctxp, 1, BD_AIO_MAX_NR_GETEVENTS,
- &events[0], &ts);
- if (ret < 0) {
- if (ret == -EINTR)
- continue;
- gf_log (this->name, GF_LOG_ERROR,
- "io_getevents() returned %d, exiting", ret);
- break;
- }
-
- for (i = 0; i < ret; i++) {
- event = &events[i];
-
- paiocb = event->data;
-
- switch (paiocb->op) {
- case GF_FOP_READ:
- bd_aio_readv_complete (paiocb, event->res,
- event->res2);
- break;
- case GF_FOP_WRITE:
- bd_aio_writev_complete (paiocb, event->res,
- event->res2);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown op %d found in piocb",
- paiocb->op);
- break;
- }
- }
- }
-
- return NULL;
-}
-
-int
-bd_aio_init (xlator_t *this)
-{
- bd_priv_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- ret = io_setup (BD_AIO_MAX_NR_EVENTS, &priv->ctxp);
- if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
- gf_log (this->name, GF_LOG_WARNING,
- "Linux AIO not available at run-time."
- " Continuing with synchronous IO");
- ret = 0;
- goto out;
- }
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "io_setup() failed. ret=%d, errno=%d",
- ret, errno);
- goto out;
- }
-
- ret = pthread_create (&priv->aiothread, NULL,
- bd_aio_thread, this);
- if (ret != 0) {
- io_destroy (priv->ctxp);
- goto out;
- }
-
- this->fops->readv = bd_aio_readv;
- this->fops->writev = bd_aio_writev;
-out:
- return ret;
-}
-
-
-int
-bd_aio_on (xlator_t *this)
-{
- bd_priv_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- if (!priv->aio_init_done) {
- ret = bd_aio_init (this);
- if (ret == 0)
- priv->aio_capable = _gf_true;
- else
- priv->aio_capable = _gf_false;
- priv->aio_init_done = _gf_true;
- }
-
- if (priv->aio_capable) {
- this->fops->readv = bd_aio_readv;
- this->fops->writev = bd_aio_writev;
- }
-
- return ret;
-}
-
-int
-bd_aio_off (xlator_t *this)
-{
- this->fops->readv = bd_readv;
- this->fops->writev = bd_writev;
-
- return 0;
-}
-
-#else
-
-int
-bd_aio_on (xlator_t *this)
-{
- gf_log (this->name, GF_LOG_INFO,
- "Linux AIO not available at build-time."
- " Continuing with synchronous IO");
- return 0;
-}
-
-int
-bd_aio_off (xlator_t *this)
-{
- gf_log (this->name, GF_LOG_INFO,
- "Linux AIO not available at build-time."
- " Continuing with synchronous IO");
- return 0;
-}
-
-void
-__bd_fd_set_odirect (fd_t *fd, struct bd_fd *pfd, int opflags,
- off_t offset, size_t size)
-{
- xlator_t *this = THIS;
- gf_log (this->name, GF_LOG_INFO,
- "Linux AIO not available at build-time."
- " Continuing with synchronous IO");
- return;
-}
-#endif
diff --git a/xlators/storage/bd/src/bd-aio.h b/xlators/storage/bd/src/bd-aio.h
deleted file mode 100644
index 16f686a4caa..00000000000
--- a/xlators/storage/bd/src/bd-aio.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- Copyright IBM, Corp. 2013
-
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef _BD_AIO_H
-#define _BD_AIO_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "glusterfs.h"
-
-/*
- * Maximum number of concurrently submitted IO events. The heaviest load
- * GlusterFS has been able to handle had 60-80 concurrent calls
- */
-#define BD_AIO_MAX_NR_EVENTS 256
-
-/* Maximum number of completed IO operations to reap per getevents syscall */
-#define BD_AIO_MAX_NR_GETEVENTS 16
-
-int bd_aio_on (xlator_t *this);
-int bd_aio_off (xlator_t *this);
-
-int bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
-
-int bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata);
-
-#endif /* !_BD_AIO_H */
diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c
deleted file mode 100644
index d598e5755c6..00000000000
--- a/xlators/storage/bd/src/bd-helper.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <lvm2app.h>
-#ifdef HAVE_LIBAIO
-#include <libaio.h>
-#endif
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#include "bd.h"
-#include "bd-mem-types.h"
-#include "run.h"
-#include "lvm-defaults.h"
-
-int
-bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx)
-{
- int ret = -1;
- uint64_t ctx_int = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
- GF_VALIDATE_OR_GOTO (this->name, ctx, out);
-
- ctx_int = (long)ctx;
- ret = inode_ctx_set (inode, this, &ctx_int);
-out:
- return ret;
-}
-
-int
-bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx)
-{
- int ret = -1;
- uint64_t ctx_int = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
- ret = inode_ctx_get (inode, this, &ctx_int);
- if (ret)
- return ret;
- if (ctx)
- *ctx = (bd_attr_t *) ctx_int;
-out:
- return ret;
-}
-
-void
-bd_local_free (xlator_t *this, bd_local_t *local)
-{
- if (!local)
- return;
- if (local->fd)
- fd_unref (local->fd);
- else if (local->loc.path)
- loc_wipe (&local->loc);
- if (local->dict)
- dict_unref (local->dict);
- if (local->inode)
- inode_unref (local->inode);
- if (local->bdatt) {
- GF_FREE (local->bdatt->type);
- GF_FREE (local->bdatt);
- }
- mem_put (local);
- local = NULL;
-}
-
-bd_local_t *
-bd_local_init (call_frame_t *frame, xlator_t *this)
-{
- frame->local = mem_get0 (this->local_pool);
- if (!frame->local)
- return NULL;
-
- return frame->local;
-}
-
-/*
- * VG are set with the tag in GF_XATTR_VOL_ID_KEY:<uuid> format.
- * This function validates this tag agains volume-uuid. Also goes
- * through LV list to find out if a thin-pool is configured or not.
- */
-int bd_scan_vg (xlator_t *this, bd_priv_t *priv)
-{
- vg_t brick = NULL;
- data_t *tmp_data = NULL;
- struct dm_list *tags = NULL;
- int op_ret = -1;
- uuid_t dict_uuid = {0, };
- uuid_t vg_uuid = {0, };
- gf_boolean_t uuid = _gf_false;
- lvm_str_list_t *strl = NULL;
- struct dm_list *lv_dm_list = NULL;
- lv_list_t *lv_list = NULL;
- struct dm_list *dm_seglist = NULL;
- lvseg_list_t *seglist = NULL;
- lvm_property_value_t prop = {0, };
- gf_boolean_t thin = _gf_false;
- const char *lv_name = NULL;
-
- brick = lvm_vg_open (priv->handle, priv->vg, "w", 0);
- if (!brick) {
- gf_log (this->name, GF_LOG_CRITICAL, "VG %s is not found",
- priv->vg);
- return ENOENT;
- }
-
- lv_dm_list = lvm_vg_list_lvs (brick);
- if (!lv_dm_list)
- goto check;
-
- dm_list_iterate_items (lv_list, lv_dm_list) {
- dm_seglist = lvm_lv_list_lvsegs (lv_list->lv);
- if (!dm_seglist)
- continue;
- dm_list_iterate_items (seglist, dm_seglist) {
- prop = lvm_lvseg_get_property (seglist->lvseg,
- "segtype");
- if (!prop.is_valid || !prop.value.string)
- continue;
- if (!strcmp (prop.value.string, "thin-pool")) {
- thin = _gf_true;
- lv_name = lvm_lv_get_name (lv_list->lv);
- priv->pool = gf_strdup (lv_name);
- gf_log (THIS->name, GF_LOG_INFO, "Thin Pool "
- "\"%s\" will be used for thin LVs",
- lv_name);
- break;
- }
- }
- }
-
-check:
- /* If there is no volume-id set in dict, we cant validate */
- tmp_data = dict_get (this->options, "volume-id");
- if (!tmp_data) {
- op_ret = 0;
- goto out;
- }
-
- op_ret = uuid_parse (tmp_data->data, dict_uuid);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong volume-id (%s) set in volume file",
- tmp_data->data);
- op_ret = -1;
- goto out;
- }
-
- tags = lvm_vg_get_tags (brick);
- if (!tags) { /* no tags in the VG */
- gf_log (this->name, GF_LOG_ERROR,
- "Extended attribute trusted.glusterfs."
- "volume-id is absent");
- op_ret = -1;
- goto out;
- }
- dm_list_iterate_items (strl, tags) {
- if (!strncmp (strl->str, GF_XATTR_VOL_ID_KEY,
- strlen (GF_XATTR_VOL_ID_KEY))) {
- uuid = _gf_true;
- break;
- }
- }
- /* UUID tag is not set in VG */
- if (!uuid) {
- gf_log (this->name, GF_LOG_ERROR,
- "Extended attribute trusted.glusterfs."
- "volume-id is absent");
- op_ret = -1;
- goto out;
- }
-
- op_ret = uuid_parse (strl->str + strlen (GF_XATTR_VOL_ID_KEY) + 1,
- vg_uuid);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong volume-id (%s) set in VG", strl->str);
- op_ret = -1;
- goto out;
- }
- if (uuid_compare (dict_uuid, vg_uuid)) {
- gf_log (this->name, GF_LOG_ERROR,
- "mismatching volume-id (%s) received. "
- "already is a part of volume %s ",
- tmp_data->data, vg_uuid);
- op_ret = -1;
- goto out;
- }
-
- op_ret = 0;
-
-out:
- lvm_vg_close (brick);
-
- if (!thin)
- gf_log (THIS->name, GF_LOG_WARNING, "No thin pool found in "
- "VG %s\n", priv->vg);
- else
- priv->caps |= BD_CAPS_THIN;
-
- return op_ret;
-}
-
-/* FIXME: Move this code to common place, so posix and bd xlator can use */
-char *
-page_aligned_alloc (size_t size, char **aligned_buf)
-{
- char *alloc_buf = NULL;
- char *buf = NULL;
-
- alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_common_mt_char);
- if (!alloc_buf)
- return NULL;
- /* page aligned buffer */
- buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
- *aligned_buf = buf;
-
- return alloc_buf;
-}
-
-static int
-__bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd_p)
-{
- int ret = -1;
- int _fd = -1;
- char *devpath = NULL;
- bd_fd_t *bdfd = NULL;
- uint64_t tmp_bdfd = 0;
- bd_priv_t *priv = this->private;
- bd_gfid_t gfid = {0, };
- bd_attr_t *bdatt = NULL;
-
- /* not bd file */
- if (fd->inode->ia_type != IA_IFREG ||
- bd_inode_ctx_get (fd->inode, this, &bdatt))
- return 0;
-
- ret = __fd_ctx_get (fd, this, &tmp_bdfd);
- if (ret == 0) {
- bdfd = (void *)(long) tmp_bdfd;
- *bdfd_p = bdfd;
- return 0;
- }
-
- uuid_utoa_r (fd->inode->gfid, gfid);
- gf_asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid);
- if (!devpath)
- goto out;
-
- _fd = open (devpath, O_RDWR | O_LARGEFILE, 0);
- if (_fd < 0) {
- ret = errno;
- gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath,
- strerror (ret));
- goto out;
- }
- bdfd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd);
- BD_VALIDATE_MEM_ALLOC (bdfd, ret, out);
-
- bdfd->fd = _fd;
- bdfd->flag = O_RDWR | O_LARGEFILE;
- if (__fd_ctx_set (fd, this, (uint64_t)(long)bdfd) < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set the fd context fd=%p", fd);
- goto out;
- }
-
- *bdfd_p = bdfd;
-
- ret = 0;
-out:
- GF_FREE (devpath);
- if (ret) {
- close (_fd);
- GF_FREE (bdfd);
- }
- return ret;
-}
-
-int
-bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd)
-{
- int ret;
-
- /* FIXME: Is it ok to fd->lock here ? */
- LOCK (&fd->lock);
- {
- ret = __bd_fd_ctx_get (this, fd, bdfd);
- }
- UNLOCK (&fd->lock);
-
- return ret;
-}
-
-/*
- * Validates if LV exists for given inode or not.
- * Returns 0 if LV exists and size also matches.
- * If LV does not exist -1 returned
- * If LV size mismatches, returnes 1 also lv_size is updated with actual
- * size
- */
-int
-bd_validate_bd_xattr (xlator_t *this, char *bd, char **type,
- uint64_t *lv_size, uuid_t uuid)
-{
- char *path = NULL;
- int ret = -1;
- bd_gfid_t gfid = {0, };
- bd_priv_t *priv = this->private;
- struct stat stbuf = {0, };
- uint64_t size = 0;
- vg_t vg = NULL;
- lv_t lv = NULL;
- char *bytes = NULL;
-
- bytes = strrchr (bd, ':');
- if (bytes) {
- *bytes = '\0';
- bytes++;
- gf_string2bytesize (bytes, &size);
- }
-
- if (strcmp (bd, BD_LV) && strcmp (bd, BD_THIN)) {
- gf_log (this->name, GF_LOG_WARNING,
- "invalid xattr %s", bd);
- return -1;
- }
- *type = gf_strdup (bd);
-
- /*
- * Check if LV really exist, there could be a failure
- * after setxattr and successful LV creation
- */
- uuid_utoa_r (uuid, gfid);
- gf_asprintf (&path, "/dev/%s/%s", priv->vg, gfid);
- if (!path) {
- gf_log (this->name, GF_LOG_WARNING,
- "insufficient memory");
- return 0;
- }
-
- /* Destination file does not exist */
- if (stat (path, &stbuf)) {
- gf_log (this->name, GF_LOG_WARNING,
- "lstat failed for path %s", path);
- return -1;
- }
-
- vg = lvm_vg_open (priv->handle, priv->vg, "r", 0);
- if (!vg) {
- gf_log (this->name, GF_LOG_WARNING,
- "VG %s does not exist?", priv->vg);
- ret = -1;
- goto out;
- }
-
- lv = lvm_lv_from_name (vg, gfid);
- if (!lv) {
- gf_log (this->name, GF_LOG_WARNING,
- "LV %s does not exist", gfid);
- ret = -1;
- goto out;
- }
-
- *lv_size = lvm_lv_get_size (lv);
- if (size == *lv_size) {
- ret = 0;
- goto out;
- }
-
- ret = 1;
-
-out:
- if (vg)
- lvm_vg_close (vg);
-
- GF_FREE (path);
- return ret;
-}
-
-static int
-create_thin_lv (char *vg, char *pool, char *lv, uint64_t extent)
-{
- int ret = -1;
- runner_t runner = {0, };
- char *path = NULL;
- struct stat stat = {0, };
-
- runinit (&runner);
- runner_add_args (&runner, LVM_CREATE, NULL);
- runner_add_args (&runner, "--thin", NULL);
- runner_argprintf (&runner, "%s/%s", vg, pool);
- runner_add_args (&runner, "--name", NULL);
- runner_argprintf (&runner, "%s", lv);
- runner_add_args (&runner, "--virtualsize", NULL);
- runner_argprintf (&runner, "%ldB", extent);
- runner_start (&runner);
- runner_end (&runner);
-
- gf_asprintf (&path, "/dev/%s/%s", vg, lv);
- if (!path) {
- ret = ENOMEM;
- goto out;
- }
- if (lstat (path, &stat) < 0)
- ret = EAGAIN;
- else
- ret = 0;
-out:
- GF_FREE (path);
- return ret;
-}
-
-int
-bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv)
-{
- int ret = 0;
- vg_t vg = NULL;
- bd_gfid_t gfid = {0, };
-
- uuid_utoa_r (uuid, gfid);
-
- if (!strcmp (type, BD_THIN))
- return create_thin_lv (priv->vg, priv->pool, gfid,
- size);
-
- vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
- if (!vg) {
- gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
- priv->vg);
- return ENOENT;
- }
-
- if (!lvm_vg_create_lv_linear (vg, gfid, size)) {
- gf_log (THIS->name, GF_LOG_WARNING, "lvm_vg_create_lv_linear "
- "failed");
- ret = errno;
- }
-
- lvm_vg_close (vg);
-
- return ret;
-}
-
-int32_t
-bd_resize (bd_priv_t *priv, uuid_t uuid, off_t size)
-{
- uint64_t new_size = 0;
- runner_t runner = {0, };
- bd_gfid_t gfid = {0, };
- int ret = 0;
- vg_t vg = NULL;
- lv_t lv = NULL;
-
- uuid_utoa_r (uuid, gfid);
-
- runinit (&runner);
-
- runner_add_args (&runner, LVM_RESIZE, NULL);
- runner_argprintf (&runner, "%s/%s", priv->vg, gfid);
- runner_argprintf (&runner, "-L%ldb", size);
- runner_add_args (&runner, "-f", NULL);
-
- runner_start (&runner);
- runner_end (&runner);
-
- vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
- if (!vg) {
- gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
- priv->vg);
- return EAGAIN;
- }
-
- lv = lvm_lv_from_name (vg, gfid);
- if (!lv) {
- gf_log (THIS->name, GF_LOG_WARNING, "LV %s not found", gfid);
- ret = EIO;
- goto out;
- }
- new_size = lvm_lv_get_size (lv);
-
- if (new_size != size) {
- gf_log (THIS->name, GF_LOG_WARNING, "resized LV size %ld does "
- "not match requested size %ld", new_size, size);
- ret = EIO;
- }
-
-out:
- lvm_vg_close (vg);
- return ret;
-}
-
-uint64_t
-bd_get_default_extent (bd_priv_t *priv)
-{
- vg_t vg = NULL;
- uint64_t size = 0;
-
- vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
- if (!vg) {
- gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
- priv->vg);
- return 0;
- }
-
- size = lvm_vg_get_extent_size (vg);
-
- lvm_vg_close (vg);
-
- return size;
-}
-
-/*
- * Adjusts the user specified size to VG specific extent size
- */
-uint64_t
-bd_adjust_size (bd_priv_t *priv, uint64_t size)
-{
- uint64_t extent = 0;
- uint64_t nr_ex = 0;
-
- extent = bd_get_default_extent (priv);
- if (!extent)
- return 0;
-
- nr_ex = size / extent;
- if (size % extent)
- nr_ex++;
-
- size = extent * nr_ex;
-
- return size;
-}
-
-int
-bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno)
-{
- vg_t vg = NULL;
- lv_t lv = NULL;
- int ret = -1;
-
- *op_errno = 0;
- vg = lvm_vg_open (priv->handle, priv->vg, "w", 0);
- if (!vg) {
- gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed",
- priv->vg);
- *op_errno = ENOENT;
- return -1;
- }
- lv = lvm_lv_from_name (vg, lv_name);
- if (!lv) {
- gf_log (THIS->name, GF_LOG_WARNING, "No such LV %s", lv_name);
- *op_errno = ENOENT;
- goto out;
- }
- ret = lvm_vg_remove_lv (lv);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_WARNING, "removing LV %s failed",
- lv_name);
- *op_errno = errno;
- goto out;
- }
-out:
- lvm_vg_close (vg);
-
- return ret;
-}
-
-inline void
-bd_update_amtime(struct iatt *iatt, int flag)
-{
- struct timespec ts = {0, };
-
- clock_gettime (CLOCK_REALTIME, &ts);
- if (flag & GF_SET_ATTR_ATIME) {
- iatt->ia_atime = ts.tv_sec;
- iatt->ia_atime_nsec = ts.tv_nsec;
- }
- if (flag & GF_SET_ATTR_MTIME) {
- iatt->ia_mtime = ts.tv_sec;
- iatt->ia_mtime_nsec = ts.tv_nsec;
- }
-}
-
-int
-bd_snapshot_create (bd_local_t *local, bd_priv_t *priv)
-{
- char *path = NULL;
- bd_gfid_t dest = {0, };
- bd_gfid_t origin = {0, };
- int ret = 0;
- runner_t runner = {0, };
- struct stat stat = {0, };
-
- uuid_utoa_r (local->dloc->gfid, dest);
- uuid_utoa_r (local->loc.gfid, origin);
-
- gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest);
- if (!path) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "Insufficient memory");
- return ENOMEM;
- }
-
- runinit (&runner);
- runner_add_args (&runner, LVM_CREATE, NULL);
- runner_add_args (&runner, "--snapshot", NULL);
- runner_argprintf (&runner, "/dev/%s/%s", priv->vg, origin);
- runner_add_args (&runner, "--name", NULL);
- runner_argprintf (&runner, "%s", dest);
- if (strcmp (local->bdatt->type, BD_THIN))
- runner_argprintf (&runner, "-L%ldB", local->size);
- runner_start (&runner);
- runner_end (&runner);
-
- if (lstat (path, &stat) < 0)
- ret = EIO;
-
- GF_FREE (path);
- return ret;
-}
-
-int
-bd_clone (bd_local_t *local, bd_priv_t *priv)
-{
- int ret = ENOMEM;
- int fd1 = -1;
- int fd2 = -1;
- int i = 0;
- char *buff = NULL;
- ssize_t bytes = 0;
- char *spath = NULL;
- char *dpath = NULL;
- struct iovec *vec = NULL;
- bd_gfid_t source = {0, };
- bd_gfid_t dest = {0, };
- void *bufp[IOV_NR] = {0, };
-
- vec = GF_CALLOC (IOV_NR, sizeof (struct iovec), gf_common_mt_iovec);
- if (!vec)
- return ENOMEM;
-
- for (i = 0; i < IOV_NR; i++) {
- bufp[i] = page_aligned_alloc (IOV_SIZE, &buff);
- if (!buff)
- goto out;
- vec[i].iov_base = buff;
- vec[i].iov_len = IOV_SIZE;
- }
-
- uuid_utoa_r (local->loc.gfid, source);
- uuid_utoa_r (local->dloc->gfid, dest);
-
- gf_asprintf (&spath, "/dev/%s/%s", priv->vg, source);
- gf_asprintf (&dpath, "/dev/%s/%s", priv->vg, dest);
- if (!spath || !dpath)
- goto out;
-
- ret = bd_create (local->dloc->gfid, local->size,
- local->bdatt->type, priv);
- if (ret)
- goto out;
-
- fd1 = open (spath, O_RDONLY | O_DIRECT);
- if (fd1 < 0) {
- ret = errno;
- goto out;
- }
- fd2 = open (dpath, O_WRONLY | O_DIRECT);
- if (fd2 < 0) {
- ret = errno;
- goto out;
- }
-
- while (1) {
- bytes = readv (fd1, vec, IOV_NR);
- if (bytes < 0) {
- ret = errno;
- gf_log (THIS->name, GF_LOG_WARNING, "read failed: %s",
- strerror (ret));
- goto out;
- }
- if (!bytes)
- break;
- bytes = writev (fd2, vec, IOV_NR);
- if (bytes < 0) {
- ret = errno;
- gf_log (THIS->name, GF_LOG_WARNING,
- "write failed: %s", strerror (ret));
- goto out;
- }
- }
- ret = 0;
-
-out:
- for (i = 0; i < IOV_NR; i++)
- GF_FREE (bufp[i]);
- GF_FREE (vec);
-
- if (fd1 != -1)
- close (fd1);
- if (fd2 != -1)
- close (fd2);
-
- GF_FREE (spath);
- GF_FREE (dpath);
-
- return ret;
-}
-
-/*
- * Merges snapshot LV to origin LV and returns status
- */
-int
-bd_merge (bd_priv_t *priv, uuid_t gfid)
-{
- bd_gfid_t dest = {0, };
- char *path = NULL;
- struct stat stat = {0, };
- runner_t runner = {0, };
- int ret = 0;
-
- uuid_utoa_r (gfid, dest);
- gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest);
-
- runinit (&runner);
- runner_add_args (&runner, LVM_CONVERT, NULL);
- runner_add_args (&runner, "--merge", NULL);
- runner_argprintf (&runner, "%s", path);
- runner_start (&runner);
- runner_end (&runner);
-
- if (!lstat (path, &stat))
- ret = EIO;
-
- GF_FREE (path);
-
- return ret;
-}
-
-int
-bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict)
-{
- vg_t brick = NULL;
- lvm_property_value_t prop = {0, };
- lv_t lv = NULL;
- int ret = -1;
- bd_gfid_t gfid = {0, };
- inode_t *inode = NULL;
- char *origin = NULL;
-
- brick = lvm_vg_open (priv->handle, priv->vg, "w", 0);
- if (!brick) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "VG %s is not found",
- priv->vg);
- return ENOENT;
- }
-
- if (fd)
- inode = fd->inode;
- else
- inode = loc->inode;
-
- uuid_utoa_r (inode->gfid, gfid);
- lv = lvm_lv_from_name (brick, gfid);
- if (!lv) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "LV %s not found", gfid);
- ret = ENOENT;
- goto out;
- }
-
- prop = lvm_lv_get_property (lv, "origin");
- if (!prop.is_valid || !prop.value.string) {
- ret = ENODATA;
- goto out;
- }
-
- origin = gf_strdup (prop.value.string);
- ret = dict_set_dynstr (dict, BD_ORIGIN, origin);
-
-out:
- lvm_vg_close (brick);
- return ret;
-}
-
-#ifndef BLKZEROOUT
-
-int
-bd_do_manual_zerofill (int fd, off_t offset, off_t len, int o_direct)
-{
- off_t num_vect = 0;
- off_t num_loop = 1;
- int idx = 0;
- int op_ret = -1;
- int vect_size = IOV_SIZE;
- off_t remain = 0;
- off_t extra = 0;
- struct iovec *vector = NULL;
- char *iov_base = NULL;
- char *alloc_buf = NULL;
-
- if (len == 0)
- return 0;
-
- if (len < IOV_SIZE)
- vect_size = len;
-
- num_vect = len / (vect_size);
- remain = len % vect_size ;
-
- if (num_vect > MAX_NO_VECT) {
- extra = num_vect % MAX_NO_VECT;
- num_loop = num_vect / MAX_NO_VECT;
- num_vect = MAX_NO_VECT;
- }
-
- vector = GF_CALLOC (num_vect, sizeof(struct iovec),
- gf_common_mt_iovec);
- if (!vector)
- return -1;
-
- if (o_direct) {
- alloc_buf = page_aligned_alloc (vect_size, &iov_base);
- if (!alloc_buf) {
- gf_log ("bd_do_manual_zerofill", GF_LOG_DEBUG,
- "memory alloc failed, vect_size %d: %s",
- vect_size, strerror (errno));
- GF_FREE (vector);
- return -1;
- }
- } else {
- iov_base = GF_CALLOC (vect_size, sizeof(char),
- gf_common_mt_char);
- if (!iov_base) {
- GF_FREE (vector);
- return -1;
- }
- }
-
- for (idx = 0; idx < num_vect; idx++) {
- vector[idx].iov_base = iov_base;
- vector[idx].iov_len = vect_size;
- }
-
- if (lseek (fd, offset, SEEK_SET) < 0) {
- op_ret = -1;
- goto err;
- }
-
- for (idx = 0; idx < num_loop; idx++) {
- op_ret = writev (fd, vector, num_vect);
- if (op_ret < 0)
- goto err;
- }
- if (extra) {
- op_ret = writev (fd, vector, extra);
- if (op_ret < 0)
- goto err;
- }
- if (remain) {
- vector[0].iov_len = remain;
- op_ret = writev (fd, vector , 1);
- if (op_ret < 0)
- goto err;
- }
- op_ret = 0;
-err:
- if (o_direct)
- GF_FREE (alloc_buf);
- else
- GF_FREE (iov_base);
- GF_FREE (vector);
- return op_ret;
-}
-
-#else
-
-/*
- * Issue Linux ZEROOUT ioctl to write '0' to a scsi device at given offset
- * and number of bytes. Each SCSI device's maximum write same bytes are exported
- * in sysfs file. Sending ioctl request greater than this bytes results in slow
- * performance. Read this file to get the maximum bytes and break down single
- * ZEROOUT request into multiple ZEROOUT request not exceeding maximum bytes.
- * From VG & LV name of device mapper identified and sysfs file read.
- * /sys/block/<block-device>/queue/write_same_max_bytes
- */
-int
-bd_do_ioctl_zerofill (bd_priv_t *priv, bd_attr_t *bdatt, int fd, char *vg,
- off_t offset, off_t len)
-{
- char *dm = NULL;
- char dmname[4096] = {0, };
- char lvname[4096] = {0, };
- char sysfs[4096] = {0, };
- bd_gfid_t uuid = {0, };
- char *p = NULL;
- off_t max_bytes = 0;
- int sysfd = -1;
- uint64_t param[2] = {0, 0};
- off_t nr_loop = 0;
- char buff[16] = {0, };
-
- uuid_utoa_r (bdatt->iatt.ia_gfid, uuid);
- sprintf (lvname, "/dev/%s/%s", vg, uuid);
-
- readlink (lvname, dmname, sizeof (dmname));
-
- p = strrchr (dmname, '/');
- if (p)
- dm = p + 1;
- else
- dm = dmname;
-
- sprintf(sysfs, "/sys/block/%s/queue/write_same_max_bytes", dm);
- sysfd = open (sysfs, O_RDONLY);
- if (sysfd < 0) {
- gf_log ("bd_do_ioctl_zerofill", GF_LOG_DEBUG,
- "sysfs file %s does not exist", lvname);
- goto skip;
- }
-
- read (sysfd, buff, sizeof (buff));
- close (sysfd);
-
- max_bytes = atoll (buff);
-
-skip:
- /*
- * If requested len is less than write_same_max_bytes,
- * issue single ioctl to zeroout. Otherwise split the ioctls
- */
- if (!max_bytes || len <= max_bytes) {
- param[0] = offset;
- param[1] = len;
-
- if (ioctl (fd, BLKZEROOUT, param) < 0)
- return errno;
- return 0;
- }
-
- /* Split ioctls to max write_same_max_bytes */
- nr_loop = len / max_bytes;
- for (; nr_loop; nr_loop--) {
- param[0] = offset;
- param[1] = max_bytes;
-
- if (ioctl (fd, BLKZEROOUT, param) < 0)
- return errno;
-
- offset += max_bytes;
- }
-
- if (!(len % max_bytes))
- return 0;
-
- param[0] = offset;
- param[1] = len % max_bytes;
-
- if (ioctl (fd, BLKZEROOUT, param) < 0)
- return errno;
-
- return 0;
-}
-#endif
-
-int
-bd_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, off_t len, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = -1;
- bd_fd_t *bd_fd = NULL;
- bd_priv_t *priv = this->private;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (priv, out);
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "bd_fd is NULL from fd=%p", fd);
- goto out;
- }
-
- bd_inode_ctx_get (fd->inode, this, &bdatt);
-#ifndef BLKZEROOUT
- ret = bd_do_manual_zerofill(bd_fd->fd, offset, len,
- bd_fd->flag & O_DIRECT);
-#else
- ret = bd_do_ioctl_zerofill(priv, bdatt, bd_fd->fd, priv->vg, offset,
- len);
-#endif
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "zerofill failed on fd %d length %ld %s",
- bd_fd->fd, len, strerror (ret));
- goto out;
- }
-
- if (bd_fd->flag & (O_SYNC|O_DSYNC)) {
- ret = fsync (bd_fd->fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "fsync() in writev on fd %d failed: %s",
- bd_fd->fd, strerror (errno));
- return errno;
- }
- }
-
- memcpy (&prebuf, &bdatt->iatt, sizeof (prebuf));
- bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
- memcpy (&postbuf, &bdatt->iatt, sizeof (postbuf));
-
-out:
-
- return ret;
-}
diff --git a/xlators/storage/bd/src/bd-mem-types.h b/xlators/storage/bd/src/bd-mem-types.h
deleted file mode 100644
index 58b44834247..00000000000
--- a/xlators/storage/bd/src/bd-mem-types.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef __BD_MEM_TYPES_H__
-#define __BD_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_bd_mem_types_ {
- gf_bd_private = gf_common_mt_end + 1,
- gf_bd_attr,
- gf_bd_fd,
- gf_bd_loc_t,
- gf_bd_int32_t,
- gf_bd_aio_cb,
- gf_bd_mt_end
-};
-
-#endif
diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c
deleted file mode 100644
index 750b009430a..00000000000
--- a/xlators/storage/bd/src/bd.c
+++ /dev/null
@@ -1,2450 +0,0 @@
-/*
- BD translator V2 - Exports Block devices on server side as regular
- files to client
-
- Now only exporting Logical volumes supported.
-
- Copyright IBM, Corp. 2013
-
- This file is part of GlusterFS.
-
- Author:
- M. Mohan Kumar <mohan@in.ibm.com>
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <lvm2app.h>
-#include <openssl/md5.h>
-#include <time.h>
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifdef HAVE_LIBAIO
-#include <libaio.h>
-#endif
-
-#include "bd.h"
-#include "bd-aio.h"
-#include "bd-mem-types.h"
-#include "defaults.h"
-#include "glusterfs3-xdr.h"
-#include "run.h"
-#include "protocol-common.h"
-#include "checksum.h"
-#include "syscall.h"
-#include "lvm-defaults.h"
-
-/*
- * Call back function for setxattr and removexattr.
- * does not do anything. FIXME: How to handle remove/setxattr failure
- */
-int
-bd_null_rmsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-/*
- * returns 0 if a file is mapped to BD or not.
- */
-int
-bd_get_bd_info (call_frame_t *frame, xlator_t *this, dict_t *xattr, uuid_t gfid,
- char **type, uint64_t *size)
-{
- char *bd_xattr = NULL;
- char *bd = NULL;
- int ret = -1;
- loc_t loc = {0, };
- dict_t *dict = NULL;
- char *p = NULL;
- call_frame_t *bd_frame = NULL;
-
- if (!xattr)
- return 1;
-
- if (dict_get_str (xattr, BD_XATTR, &p))
- return 1;
-
- bd_xattr = gf_strdup (p);
-
- memcpy (loc.gfid, gfid, sizeof (uuid_t));
-
- bd_frame = copy_frame (frame);
- BD_VALIDATE_MEM_ALLOC (bd_frame, ret, out);
-
- ret = bd_validate_bd_xattr (this, bd_xattr, type, size, gfid);
- if (ret < 0) {/* LV does not exist */
- STACK_WIND (bd_frame, bd_null_rmsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->removexattr, &loc,
- BD_XATTR, NULL);
-
- gf_log (this->name, GF_LOG_WARNING,
- "Mapped LV not available for posix file <gfid:%s>, "
- "deleting mapping", uuid_utoa (gfid));
- } else if (ret == 1) {
- /* BD_XATTR size and LV size mismatch. Update BD_XATTR */
- gf_asprintf (&bd, "%s:%ld", *type, *size);
-
- dict = dict_new ();
- BD_VALIDATE_MEM_ALLOC (dict, ret, out);
-
- ret = dict_set_dynstr (dict, BD_XATTR, bd);
- if (ret)
- goto out;
-
- STACK_WIND (bd_frame, bd_null_rmsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setxattr, &loc, dict, 0,
- NULL);
- }
-
-out:
- dict_del (xattr, BD_XATTR);
- GF_FREE (bd_xattr);
- GF_FREE (bd);
- return ret;
-}
-
-/*
- * bd_lookup_cbk: Call back from posix_lookup.
- */
-int32_t
-bd_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int ret = -1;
- bd_attr_t *bdatt = NULL;
- uint64_t size = 0;
- char *type = BD_TYPE_NONE;
-
- /* only regular files are part of BD object */
- if (op_ret < 0 || buf->ia_type != IA_IFREG)
- goto out;
-
- /* iatt already cached */
- if (!bd_inode_ctx_get (inode, this, &bdatt))
- goto next;
-
- if (bd_get_bd_info (frame, this, xattr, buf->ia_gfid, &type, &size))
- goto out;
-
- /* BD file, update buf */
- bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
- if (!bdatt) {
- op_errno = ENOMEM;
- goto out;
- }
- memcpy (&bdatt->iatt, buf, sizeof (struct iatt));
- bdatt->type = type;
-
- /* Cache LV size in inode_ctx */
- ret = bd_inode_ctx_set (inode, this, bdatt);
- if (ret < 0) {
- GF_FREE (bdatt);
- op_errno = EINVAL;
- goto out;
- }
-
- bdatt->iatt.ia_size = size;
- bdatt->iatt.ia_blocks = size / 512;
-
-next:
- dict_del (xattr, GF_CONTENT_KEY);
- memcpy (buf, &bdatt->iatt, sizeof (struct iatt));
-
-out:
- BD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
- xattr, postparent);
- return 0;
-}
-
-/*
- * bd_lookup: Issues posix_lookup to find out if file is mapped to BD
- * bd_lookup -> posix_lookup -> bd_lookup_cbk
-*/
-int32_t
-bd_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- dict_t *bd_xattr = NULL;
- bd_attr_t *bdatt = NULL;
- int op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- if (bd_inode_ctx_get (loc->inode, this, &bdatt) < 0) {
- if (!xattr_req) {
- bd_xattr = dict_new ();
- BD_VALIDATE_MEM_ALLOC (bd_xattr, op_errno, out);
- xattr_req = bd_xattr;
- }
- if (dict_set_int8 (xattr_req, BD_XATTR, 1) < 0)
- goto out;
- }
-
- STACK_WIND (frame, bd_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
-
- if (bd_xattr)
- dict_unref (bd_xattr);
- return 0;
-out:
- BD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-int
-bd_forget (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- uint64_t ctx = 0;
- bd_attr_t *bdatt = NULL;
-
- ret = bd_inode_ctx_get (inode, this, &bdatt);
- if (!ret) {
- inode_ctx_del (inode, this, &ctx);
- GF_FREE (bdatt);
- }
- return 0;
-}
-
-int
-bd_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *entries, dict_t *xdata)
-{
- gf_dirent_t *entry = NULL;
- uint64_t size = 0;
- char *type = NULL;
-
- if (op_ret < 0)
- goto out;
-
- list_for_each_entry (entry, &entries->list, list) {
- if (entry->d_type != DT_REG)
- continue;
- if (!bd_get_bd_info (frame, this, entry->dict,
- entry->d_stat.ia_gfid, &type, &size)) {
- entry->d_stat.ia_size = size;
- entry->d_stat.ia_blocks = size / 512;
- GF_FREE (type);
- }
- }
-
-out:
- BD_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-/*
- * bd_readdirp: In bd_readdirp_cbk if the file and BD_XATTR_SIZE is set
- * ia_size is updated with the LV(BD_XATTR_SIZE) size
- */
-int32_t
-bd_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *dict)
-{
- int op_errno = EINVAL;
- bd_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- if (!dict) {
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
- local->dict = dict_new ();
- BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
- dict = local->dict;
- }
-
- if (dict_set_int8 (dict, BD_XATTR, 0)) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set key %s", BD_XATTR);
- goto out;
- }
-
- STACK_WIND (frame, bd_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict);
-
- return 0;
-out:
- BD_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, dict);
- return 0;
-}
-
-int
-bd_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *buf, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
- bd_attr_t *bdatt = NULL;
-
- /* only regular files are part of BD object */
- if (op_ret < 0 || buf->ia_type != IA_IFREG)
- goto out;
-
- BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out);
-
- /* update buf with LV size */
- if (!bd_inode_ctx_get (local->inode, this, &bdatt))
- memcpy (buf, bdatt, sizeof (struct iatt));
-
-out:
- BD_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int
-bd_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int op_errno = EINVAL;
- bd_local_t *local = NULL;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- if (!bd_inode_ctx_get (loc->inode, this, &bdatt)) {
- BD_STACK_UNWIND (stat, frame, 0, 0, &bdatt->iatt, xdata);
- return 0;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
- local->inode = inode_ref (loc->inode);
-
- STACK_WIND(frame, bd_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-out:
- BD_STACK_UNWIND (stat, frame, -1, op_errno, NULL, xdata);
- return 0;
-}
-
-int
-bd_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct statvfs *buff, dict_t *xdata)
-{
- uint64_t size = 0;
- uint64_t fr_size = 0;
- bd_priv_t *priv = NULL;
- vg_t vg = NULL;
-
- if (op_ret < 0)
- goto out;
-
- priv = this->private;
-
- vg = lvm_vg_open (priv->handle, priv->vg, "r", 0);
- if (!vg) {
- gf_log (this->name, GF_LOG_WARNING, "opening VG %s failed",
- priv->vg);
- op_ret = -1;
- op_errno = EAGAIN;
- goto out;
- }
- size = lvm_vg_get_size (vg);
- fr_size = lvm_vg_get_free_size (vg);
- lvm_vg_close (vg);
-
- buff->f_blocks += size / buff->f_frsize;
- buff->f_bfree += fr_size / buff->f_frsize;
- buff->f_bavail += fr_size / buff->f_frsize;
-
-out:
- BD_STACK_UNWIND (statfs, frame, op_ret, op_errno, buff, xdata);
- return 0;
-}
-
-/*
- * bd_statfs: Mimics statfs by returning used/free extents in the VG
- */
-int
-bd_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- STACK_WIND (frame, bd_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-out:
- BD_STACK_UNWIND (statfs, frame, -1, EINVAL, NULL, NULL);
- return 0;
-}
-
-int
-bd_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *buf, dict_t *xdata)
-{
- bd_attr_t *bdatt = NULL;
- bd_local_t *local = frame->local;
-
- /* only regular files are part of BD object */
- if (op_ret < 0 || buf->ia_type != IA_IFREG)
- goto out;
-
- BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out);
-
- /* update buf with LV size */
- if (!bd_inode_ctx_get (local->inode, this, &bdatt))
- memcpy (buf, &bdatt->iatt, sizeof (struct iatt));
-
-out:
- BD_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int
-bd_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- int op_errno = EINVAL;
- bd_local_t *local = NULL;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- /* if its already cached return it */
- if (!bd_inode_ctx_get (fd->inode, this, &bdatt)) {
- BD_STACK_UNWIND (fstat, frame, 0, 0, &bdatt->iatt, xdata);
- return 0;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->inode = inode_ref (fd->inode);
-
- STACK_WIND (frame, bd_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
-
- return 0;
-out:
- BD_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, xdata);
- return 0;
-}
-
-/*
- * bd_readv: If posix file, invokes posix_readv otherwise reads from the BD
- * file
- */
-int
-bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int ret = -1;
- int _fd = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- bd_fd_t *bd_fd = NULL;
- struct iovec vec = {0, };
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- uint64_t bd_size = 0;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd) {
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
- }
- if (!size) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
- goto out;
- }
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (!iobuf) {
- op_errno = ENOMEM;
- goto out;
- }
- _fd = bd_fd->fd;
- op_ret = pread (_fd, iobuf->ptr, size, offset);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "read failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- vec.iov_base = iobuf->ptr;
- vec.iov_len = op_ret;
-
- iobref = iobref_new ();
- iobref_add (iobref, iobuf);
-
- if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
- op_errno = EINVAL;
- op_ret = -1;
- goto out;
- }
- bd_size = bdatt->iatt.ia_size;
- if (!bd_size || (offset + vec.iov_len) >= bd_size)
- op_errno = ENOENT;
-
- op_ret = vec.iov_len;
- bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_ATIME);
-
-out:
- BD_STACK_UNWIND (readv, frame, op_ret, op_errno,
- &vec, 1, &bdatt->iatt, iobref, NULL);
-
- if (iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-#ifdef BLKDISCARD
-/*
- * bd_discard: Sends BLKDISCARD ioctl to the block device
- */
-int
-bd_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- int ret = -1;
- int op_errno = EINVAL;
- bd_fd_t *bd_fd = NULL;
- uint64_t param[2] = {0, };
- bd_attr_t *bdatt = NULL;
- struct iatt prebuf = {0, };
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- /* posix */
- if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
- STACK_WIND (frame, default_discard_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->discard,
- fd, offset, len, xdata);
- return 0;
- }
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd) {
- op_errno = EINVAL;
- goto out;
- }
-
- param[0] = offset;
- param[1] = len;
- ret = ioctl (bd_fd->fd, BLKDISCARD, param);
- if (ret < 0) {
- if (errno == ENOTTY)
- op_errno = ENOSYS;
- else
- op_errno = errno;
- goto out;
- }
- memcpy (&prebuf, &bdatt->iatt, sizeof (prebuf));
- bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
-
- BD_STACK_UNWIND (discard, frame, ret, op_errno, &prebuf,
- &bdatt->iatt, xdata);
- return 0;
-
-out:
- BD_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-#else
-
-int
-bd_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- BD_STACK_UNWIND (discard, frame, -1, ENOSYS, NULL, NULL, NULL);
- return 0;
-}
-#endif
-
-/*
- * Call back from posix_open for opening the backing posix file
- * If it failed, close BD fd
- */
-int
-bd_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
-{
- bd_fd_t *bd_fd = NULL;
- bd_attr_t *bdatt = NULL;
-
- if (!op_ret)
- goto out;
-
- bd_inode_ctx_get (fd->inode, this, &bdatt);
- if (!bdatt) /* posix file */
- goto out;
-
- /* posix open failed */
- if (bd_fd_ctx_get (this, fd, &bd_fd) < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "bd_fd is NULL from fd=%p", fd);
- goto out;
- }
- close (bd_fd->fd);
- GF_FREE (bd_fd);
-
-out:
- BD_STACK_UNWIND (open, frame, op_ret, op_errno, fd, NULL);
-
- return 0;
-}
-
-/*
- * bd_open: Opens BD file if given posix file is mapped to BD. Also opens
- * posix file.
- * fd contains both posix and BD fd
- */
-int32_t
-bd_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- int32_t ret = EINVAL;
- bd_fd_t *bd_fd = NULL;
- bd_attr_t *bdatt = NULL;
- bd_gfid_t gfid = {0, };
- char *devpath = NULL;
- bd_priv_t *priv = this->private;
- int _fd = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (fd, out);
-
- /* not bd file */
- if (fd->inode->ia_type != IA_IFREG ||
- bd_inode_ctx_get (fd->inode, this, &bdatt))
- goto posix;
-
- uuid_utoa_r (fd->inode->gfid, gfid);
- gf_asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid);
- BD_VALIDATE_MEM_ALLOC (devpath, ret, out);
-
- _fd = open (devpath, flags | O_LARGEFILE, 0);
- if (_fd < 0) {
- ret = errno;
- gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath,
- strerror (ret));
- goto out;
- }
- bd_fd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd);
- BD_VALIDATE_MEM_ALLOC (bd_fd, ret, out);
-
- bd_fd->fd = _fd;
- bd_fd->flag = flags | O_LARGEFILE;
-
- if (fd_ctx_set (fd, this, (uint64_t)(long)bd_fd) < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set the fd context fd=%p", fd);
- goto out;
- }
-
- ret = 0;
-
-posix:
-
- /* open posix equivalant of this file, fd needed for fd related
- operations like fsetxattr, ftruncate etc */
- STACK_WIND (frame, bd_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
-
- return 0;
-out:
- BD_STACK_UNWIND (open, frame, -1, ret, fd, NULL);
-
- GF_FREE (devpath);
- if (ret) {
- close (_fd);
- GF_FREE (bd_fd);
- }
-
- return 0;
-}
-
-/*
- * call back from posix_setattr after updating iatt to posix file.
- */
-int
-bd_fsync_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *pre,
- struct iatt *post, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
- bd_attr_t *bdatt = local->bdatt;
-
- BD_STACK_UNWIND (fsync, frame, op_ret, op_errno, &bdatt->iatt,
- &bdatt->iatt, NULL);
- return 0;
-}
-
-int
-bd_do_fsync (int fd, int datasync)
-{
- int op_errno = 0;
-
- if (datasync) {
- if (sys_fdatasync (fd)) {
- op_errno = errno;
- gf_log (THIS->name, GF_LOG_ERROR,
- "fdatasync on fd=%d failed: %s",
- fd, strerror (errno));
- }
-
- } else
-
- {
- if (sys_fsync (fd)) {
- op_errno = errno;
- gf_log (THIS->name, GF_LOG_ERROR,
- "fsync on fd=%d failed: %s",
- fd, strerror (op_errno));
- }
- }
-
- return op_errno;
-}
-
-/*
- * bd_fsync: Syncs if BD fd, forwards the request to posix
- * fsync -> posix_setattr -> posix_fsync
-*/
-int32_t
-bd_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync, dict_t *xdata)
-{
- int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- bd_fd_t *bd_fd = NULL;
- bd_priv_t *priv = NULL;
- bd_attr_t *bdatt = NULL;
- bd_local_t *local = NULL;
- int valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- struct iatt prebuf = {0, };
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ret = bd_inode_ctx_get (fd->inode, this, &bdatt);
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd || !bdatt) {
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync, fd, datasync,
- xdata);
- return 0;
- }
-
- memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
-
- op_errno = bd_do_fsync (bd_fd->fd, datasync);
- if (op_errno)
- goto out;
-
- /* For BD, Update the a|mtime during full fsync only */
- if (!datasync) {
- local = bd_local_init (frame, this);
- /* In case of mem failure, should posix flush called ? */
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
- BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
-
- local->bdatt->type = gf_strdup (bdatt->type);
- memcpy (&local->bdatt->iatt, &bdatt->iatt, sizeof (struct iatt));
- bd_update_amtime (&local->bdatt->iatt, valid);
- uuid_copy (local->loc.gfid, fd->inode->gfid);
- STACK_WIND (frame, bd_fsync_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, &local->loc,
- &local->bdatt->iatt,
- valid, NULL);
- return 0;
- }
-
-out:
- BD_STACK_UNWIND (fsync, frame, op_ret, op_errno, &prebuf,
- &bdatt->iatt, NULL);
- return 0;
-}
-
-int
-bd_flush_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *pre,
- struct iatt *post, dict_t *xdata)
-{
- BD_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-bd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- int ret = -1;
- bd_fd_t *bd_fd = NULL;
- bd_priv_t *priv = NULL;
- bd_attr_t *bdatt = NULL;
- int valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- bd_local_t *local = NULL;
- int op_errno = EINVAL;
- loc_t loc = {0, };
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ret = bd_inode_ctx_get (fd->inode, this, &bdatt);
- if (!bdatt)
- goto out;
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd || !bdatt) {
- gf_log (this->name, GF_LOG_WARNING,
- "bdfd/bdatt is NULL from fd=%p", fd);
- goto out;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->fd = fd_ref (fd);
- uuid_copy (loc.gfid, bdatt->iatt.ia_gfid);
-
- /* Update the a|mtime during flush */
- STACK_WIND (frame, bd_flush_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, &loc, &bdatt->iatt,
- valid, NULL);
-
- return 0;
-
-out:
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd, xdata);
-
- return 0;
-}
-
-int32_t
-bd_release (xlator_t *this, fd_t *fd)
-{
- int ret = -1;
- bd_fd_t *bd_fd = NULL;
- uint64_t tmp_bfd = 0;
- bd_attr_t *bdatt = NULL;
- bd_priv_t *priv = this->private;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (priv, out);
-
- ret = bd_inode_ctx_get (fd->inode, this, &bdatt);
- if (ret || !bdatt) /* posix file */
- goto out;
-
- /* FIXME: Update amtime during release */
-
- ret = fd_ctx_del (fd, this, &tmp_bfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "bfd is NULL from fd=%p", fd);
- goto out;
- }
- bd_fd = (bd_fd_t *)(long)tmp_bfd;
-
- close (bd_fd->fd);
- GF_FREE (bd_fd);
-out:
- return 0;
-}
-
-/*
- * Call back for removexattr after removing BD_XATTR incase of
- * bd create failure
- */
-int
-bd_setx_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
-
- if (local->fd)
- BD_STACK_UNWIND (setxattr, frame, -1, EIO, xdata);
- else
- BD_STACK_UNWIND (setxattr, frame, -1, EIO, xdata);
- return 0;
-
-}
-
-/*
- * Call back after setting BD_XATTR. Creates BD. If BD creation is a failure
- * invokes posix_removexattr to remove created BD_XATTR
- */
-int
-bd_setx_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
- bd_attr_t *bdatt = NULL;
-
- if (op_ret < 0)
- goto next;
-
- /* Create LV */
- op_errno = bd_create (local->inode->gfid, local->bdatt->iatt.ia_size,
- local->bdatt->type, this->private);
- if (!op_errno)
- goto out;
-
- /* LV creation failed, remove BD_XATTR */
- if (local->fd)
- STACK_WIND (frame, bd_setx_rm_xattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr,
- local->fd, BD_XATTR, NULL);
- else
- STACK_WIND (frame, bd_setx_rm_xattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- &local->loc, BD_XATTR, NULL);
-
- return 0;
-out:
-
- bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
- if (!bdatt) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto next;
- }
-
- memcpy (&bdatt->iatt, &local->bdatt->iatt, sizeof (struct iatt));
- bdatt->type = gf_strdup (local->bdatt->type);
-
- bd_inode_ctx_set (local->inode, THIS, bdatt);
-
-next:
- if (local->fd)
- BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
- else
- BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-
-}
-
-/*
- * Call back from posix_stat
- */
-int
-bd_setx_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *iatt,
- dict_t *xdata)
-{
- char *param = NULL;
- char *type = NULL;
- char *s_size = NULL;
- char *p = NULL;
- char *copy = NULL;
- bd_local_t *local = frame->local;
- bd_priv_t *priv = this->private;
- char *bd = NULL;
- uint64_t size = 0;
-
- if (op_ret < 0)
- goto out;
-
- if (!IA_ISREG (iatt->ia_type)) {
- op_errno = EOPNOTSUPP;
- goto out;
- }
-
- param = copy = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char);
- BD_VALIDATE_MEM_ALLOC (param, op_errno, out);
-
- strncpy (param, local->data->data, local->data->len);
-
- type = strtok_r (param, ":", &p);
- if (!type) {
- op_errno = EINVAL;
- goto out;
- }
-
- if (strcmp (type, BD_LV) && strcmp (type, BD_THIN)) {
- gf_log (this->name, GF_LOG_WARNING, "Invalid bd type %s given",
- type);
- op_errno = EINVAL;
- goto out;
- }
-
- if (!strcmp (type, BD_THIN) && !(priv->caps & BD_CAPS_THIN)) {
- gf_log (this->name, GF_LOG_WARNING, "THIN lv not supported by "
- "this volume");
- op_errno = EOPNOTSUPP;
- goto out;
- }
-
- s_size = strtok_r (NULL, ":", &p);
-
- /* If size not specified get default size */
- if (!s_size)
- size = bd_get_default_extent (priv);
- else
- gf_string2bytesize (s_size, &size);
-
- gf_asprintf (&bd, "%s:%ld", type, size);
- BD_VALIDATE_MEM_ALLOC (bd, op_errno, out);
-
- local->dict = dict_new ();
- BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
-
- local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
- BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
-
- if (dict_set_dynstr (local->dict, BD_XATTR, bd) < 0) {
- op_errno = EINVAL;
- goto out;
- }
-
- local->bdatt->type = gf_strdup (type);
- memcpy (&local->bdatt->iatt, iatt, sizeof (struct iatt));
- local->bdatt->iatt.ia_size = size;
-
- if (local->fd)
- STACK_WIND (frame, bd_setx_setx_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- local->fd, local->dict, 0, NULL);
- else
- STACK_WIND (frame, bd_setx_setx_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->loc, local->dict, 0, NULL);
-
- return 0;
-
-out:
- if (local->fd)
- BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, xdata);
- else
- BD_STACK_UNWIND (setxattr, frame, -1, op_errno, xdata);
-
- GF_FREE (bd);
- GF_FREE (copy);
- return 0;
-}
-
-int
-bd_offload_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
-
- if (local->fd)
- BD_STACK_UNWIND (fsetxattr, frame, -1, EIO, NULL);
- else
- BD_STACK_UNWIND (setxattr, frame, -1, EIO, NULL);
-
- return 0;
-}
-
-int
-bd_offload_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto out;
-
- if (local->offload == BD_OF_SNAPSHOT)
- op_ret = bd_snapshot_create (frame->local, this->private);
- else
- op_ret = bd_clone (frame->local, this->private);
-
- if (op_ret) {
- STACK_WIND (frame, bd_offload_rm_xattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- local->dloc, BD_XATTR, NULL);
- return 0;
- }
-
-out:
- if (local->fd)
- BD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
- else
- BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-int
-bd_offload_getx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- char *bd = NULL;
- bd_local_t *local = frame->local;
- char *type = NULL;
- char *p = NULL;
-
- if (op_ret < 0)
- goto out;
-
- if (dict_get_str (xattr, BD_XATTR, &p)) {
- op_errno = EINVAL;
- goto out;
- }
-
- type = gf_strdup (p);
- BD_VALIDATE_MEM_ALLOC (type, op_errno, out);
-
- p = strrchr (type, ':');
- if (!p) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING,
- "source file xattr %s corrupted?", type);
- goto out;
- }
-
- *p='\0';
-
- /* For clone size is taken from source LV */
- if (!local->size) {
- p++;
- gf_string2bytesize (p, &local->size);
- }
- gf_asprintf (&bd, "%s:%ld", type, local->size);
- local->bdatt->type = gf_strdup (type);
- dict_del (local->dict, BD_XATTR);
- dict_del (local->dict, LINKTO);
- if (dict_set_dynstr (local->dict, BD_XATTR, bd)) {
- op_errno = EINVAL;
- goto out;
- }
-
- STACK_WIND (frame, bd_offload_setx_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- local->dloc, local->dict, 0, NULL);
-
- return 0;
-
-out:
- if (local->fd)
- BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- else
- BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
-
- GF_FREE (type);
- GF_FREE (bd);
-
- return 0;
-}
-
-int
-bd_offload_dest_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *iatt,
- dict_t *xattr, struct iatt *postparent)
-{
- bd_local_t *local = frame->local;
- char *bd = NULL;
- int ret = -1;
- char *linkto = NULL;
-
- if (op_ret < 0 && op_errno != ENODATA) {
- op_errno = EINVAL;
- goto out;
- }
-
- if (!IA_ISREG (iatt->ia_type)) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "destination gfid is not a "
- "regular file");
- goto out;
- }
-
- ret = dict_get_str (xattr, LINKTO, &linkto);
- if (linkto) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "destination file not "
- "present in same brick");
- goto out;
- }
-
- ret = dict_get_str (xattr, BD_XATTR, &bd);
- if (bd) {
- op_errno = EEXIST;
- goto out;
- }
-
- local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
- BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
-
- STACK_WIND (frame, bd_offload_getx_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- &local->loc, BD_XATTR, NULL);
-
- return 0;
-out:
- if (local->fd)
- BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- else
- BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-int
-bd_merge_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- /* FIXME: if delete failed, remove xattr */
-
- BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-int
-bd_do_merge(call_frame_t *frame, xlator_t *this)
-{
- bd_local_t *local = frame->local;
- inode_t *parent = NULL;
- char *p = NULL;
- int op_errno = 0;
-
- op_errno = bd_merge (this->private, local->inode->gfid);
- if (op_errno)
- goto out;
-
- /*
- * posix_unlink needs loc->pargfid to be valid, but setxattr FOP does
- * not have loc->pargfid set. Get parent's gfid by getting parents inode
- */
- parent = inode_parent (local->inode, NULL, NULL);
- if (!parent) {
- /*
- * FIXME: Snapshot LV already deleted.
- * remove xattr, instead of returning failure
- */
- op_errno = EINVAL;
- goto out;
- }
- uuid_copy (local->loc.pargfid, parent->gfid);
-
- p = strrchr (local->loc.path, '/');
- if (p)
- p++;
- local->loc.name = p;
-
- STACK_WIND (frame, bd_merge_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- &local->loc, 0, NULL);
-
- return 0;
-out:
- BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
-
- return op_errno;
-}
-
-int
-bd_offload (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd, bd_offload_t offload)
-{
- char *param = NULL;
- char *param_copy = NULL;
- char *p = NULL;
- char *size = NULL;
- char *gfid = NULL;
- int op_errno = 0;
- bd_local_t *local = frame->local;
-
- param = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char);
- BD_VALIDATE_MEM_ALLOC (param, op_errno, out);
- param_copy = param;
-
- local->dict = dict_new ();
- BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
-
- local->dloc = GF_CALLOC (1, sizeof (loc_t), gf_bd_loc_t);
- BD_VALIDATE_MEM_ALLOC (local->dloc, op_errno, out);
-
- strncpy (param, local->data->data, local->data->len);
-
- gfid = strtok_r (param, ":", &p);
- size = strtok_r (NULL, ":", &p);
- if (size)
- gf_string2bytesize (size, &local->size);
- else if (offload != BD_OF_CLONE)
- local->size = bd_get_default_extent (this->private);
-
- if (dict_set_int8 (local->dict, BD_XATTR, 1) < 0) {
- op_errno = EINVAL;
- goto out;
- }
- if (dict_set_int8 (local->dict, LINKTO, 1) < 0) {
- op_errno = EINVAL;
- goto out;
- }
-
- uuid_parse (gfid, local->dloc->gfid);
- local->offload = offload;
-
- STACK_WIND (frame, bd_offload_dest_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, local->dloc,
- local->dict);
-
- return 0;
-
-out:
- if (fd)
- BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- else
- BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
-
- GF_FREE (param_copy);
- return 0;
-}
-
-/*
- * bd_setxattr: Used to create & map an LV to a posix file using
- * BD_XATTR xattr
- * bd_setxattr -> posix_stat -> bd_setx_stat_cbk -> posix_setxattr ->
- * bd_setx_setx_cbk -> create_lv
- * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk
- */
-int
-bd_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int op_errno = 0;
- data_t *data = NULL;
- bd_local_t *local = NULL;
- bd_attr_t *bdatt = NULL;
- bd_offload_t cl_type = BD_OF_NONE;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
-
- if ((data = dict_get (dict, BD_XATTR)))
- cl_type = BD_OF_NONE;
- else if ((data = dict_get (dict, BD_CLONE)))
- cl_type = BD_OF_CLONE;
- else if ((data = dict_get (dict, BD_SNAPSHOT)))
- cl_type = BD_OF_SNAPSHOT;
- else if ((data = dict_get (dict, BD_MERGE)))
- cl_type = BD_OF_MERGE;
-
- bd_inode_ctx_get (loc->inode, this, &bdatt);
- if (!cl_type && !data) {
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setxattr, loc, dict,
- flags, xdata);
- return 0;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->data = data;
- loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
-
- if (cl_type) {
- /* For cloning/snapshot, source file must be mapped to LV */
- if (!bdatt) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s not mapped to BD", loc->path);
- op_errno = EINVAL;
- goto out;
- }
- if (cl_type == BD_OF_MERGE)
- bd_do_merge (frame, this);
- else
- bd_offload (frame, this, loc, NULL, cl_type);
- } else if (data) {
- if (bdatt) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s already mapped to BD", loc->path);
- op_errno = EEXIST;
- goto out;
- }
- STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc, xdata);
- }
-
- return 0;
-out:
- if (op_errno)
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
-
- return 0;
-}
-
-/*
- * bd_fsetxattr: Used to create/map an LV to a posix file using
- * BD_XATTR xattr
- * bd_fsetxattr -> posix_fstat -> bd_setx_stat_cbk -> posix_fsetxattr ->
- * bd_setx_setx_cbk -> create_lv
- * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk
- * -> bd_fsetxattr_cbk
- */
-int32_t
-bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int op_errno = 0;
- data_t *data = NULL;
- bd_attr_t *bdatt = NULL;
- bd_local_t *local = NULL;
- bd_offload_t cl_type = BD_OF_NONE;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- bd_inode_ctx_get (fd->inode, this, &bdatt);
-
- data = dict_get (dict, BD_XATTR);
- if ((data = dict_get (dict, BD_XATTR)))
- cl_type = BD_OF_NONE;
- else if ((data = dict_get (dict, BD_CLONE)))
- cl_type = BD_OF_CLONE;
- else if ((data = dict_get (dict, BD_SNAPSHOT)))
- cl_type = BD_OF_SNAPSHOT;
- else if ((data = dict_get (dict, BD_MERGE))) {
- /*
- * bd_merge is not supported for fsetxattr, because snapshot LV
- * is opened and it causes problem in snapshot merge
- */
- op_errno = EOPNOTSUPP;
- goto out;
- }
-
- bd_inode_ctx_get (fd->inode, this, &bdatt);
-
- if (!cl_type && !data) {
- /* non bd file object */
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
- return 0;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->inode = inode_ref (fd->inode);
- local->fd = fd_ref (fd);
- local->data = data;
-
- if (cl_type) {
- /* For cloning/snapshot, source file must be mapped to LV */
- if (!bdatt) {
- gf_log (this->name, GF_LOG_WARNING,
- "fd %p not mapped to BD", fd);
- op_errno = EINVAL;
- goto out;
-
- }
- bd_offload (frame, this, NULL, fd, cl_type);
- } else if (data) {
- if (bdatt) {
- gf_log (this->name, GF_LOG_WARNING,
- "fd %p already mapped to BD", fd);
- op_errno = EEXIST;
- goto out;
- }
- STACK_WIND(frame, bd_setx_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- }
-
- return 0;
-out:
-
- BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-int32_t
-bd_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- if (!strcmp (name, BD_XATTR))
- goto out;
-
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-out:
- BD_STACK_UNWIND (removexattr, frame, -1, ENODATA, NULL);
- return 0;
-}
-
-int32_t
-bd_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- if (!strcmp (name, BD_XATTR))
- goto out;
-
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
-
- return 0;
-out:
- BD_STACK_UNWIND (fremovexattr, frame, -1, ENODATA, NULL);
- return 0;
-}
-
-int
-bd_trunc_setxattr_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
-
- if (local->fd)
- BD_STACK_UNWIND (ftruncate, frame, -1, EIO, NULL, NULL, NULL);
- else
- BD_STACK_UNWIND (truncate, frame, -1, EIO, NULL, NULL, NULL);
-
- return 0;
-}
-
-/*
- * Call back for setxattr after setting BD_XATTR_SIZE.
- */
-int
-bd_trunc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- bd_local_t *local = frame->local;
- bd_attr_t *bdatt = NULL;
- struct iatt prebuf = {0, };
- char *bd = NULL;
-
- if (op_ret < 0)
- goto out;
-
- bd_inode_ctx_get (local->inode, this, &bdatt);
- if (!bdatt)
- goto revert_xattr;
-
- op_errno = bd_resize (this->private, local->inode->gfid,
- local->bdatt->iatt.ia_size);
- if (op_errno)
- goto revert_xattr;
-
- memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
- /* LV resized, update new size in the cache */
- bdatt->iatt.ia_size = local->bdatt->iatt.ia_size;
-
- if (local->fd)
- BD_STACK_UNWIND (ftruncate, frame, 0, 0, &prebuf, &bdatt->iatt,
- NULL);
- else
- BD_STACK_UNWIND (truncate, frame, 0, 0, &prebuf, &bdatt->iatt,
- NULL);
-
- return 0;
-
-revert_xattr:
- /* revert setxattr */
- op_ret = dict_get_str (local->dict, BD_XATTR, &bd);
- GF_FREE (bd);
- gf_asprintf (&bd, "%s:%ld", bdatt->type, bdatt->iatt.ia_size);
-
- if (local->fd)
- STACK_WIND (frame, bd_trunc_setxattr_setx_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- local->fd, local->dict, 0, NULL);
- else
- STACK_WIND (frame, bd_trunc_setxattr_setx_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->loc, local->dict, 0, NULL);
-
- return 0;
-out:
- if (local->fd)
- BD_STACK_UNWIND (ftruncate, frame, -1, EIO, NULL, NULL, NULL);
- else
- BD_STACK_UNWIND (truncate, frame, -1, EIO, NULL, NULL, NULL);
-
- return 0;
-}
-
-/*
- * call back from posix_[f]truncate_stat
- * If offset > LV size, it resizes the LV and calls posix_setxattr
- * to update new LV size in xattr else calls posix_setattr for updating
- * the posix file so that truncate fop behaves properly
- */
-int
-bd_trunc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *buf, dict_t *xdata)
-{
- char *bd = NULL;
- bd_local_t *local = frame->local;
- bd_attr_t *bdatt = NULL;
-
- if (op_ret < 0)
- goto out;
-
- local->dict = dict_new ();
- BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
-
- bd_inode_ctx_get (local->inode, this, &bdatt);
- if (!bdatt) {
- op_errno = EINVAL;
- goto out;
- }
-
- gf_asprintf (&bd, "%s:%ld", bdatt->type, local->bdatt->iatt.ia_size);
- if (dict_set_dynstr (local->dict, BD_XATTR, bd)) {
- op_errno = EINVAL;
- goto out;
- }
-
- if (local->fd)
- STACK_WIND (frame, bd_trunc_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- local->fd, local->dict, 0, NULL);
- else
- STACK_WIND (frame, bd_trunc_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->loc, local->dict, 0, NULL);
-
- return 0;
-out:
- if (local->fd)
- BD_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL,
- NULL);
- else
- BD_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL,
- NULL);
- GF_FREE (bd);
- return 0;
-}
-
-void
-bd_do_trunc (call_frame_t *frame, xlator_t *this, fd_t *fd, loc_t *loc,
- off_t offset, bd_attr_t *bdatt)
-{
- bd_local_t *local = NULL;
- struct iatt prebuf = {0, };
- int op_errno = 0;
- int op_ret = -1;
-
- /* If requested size is less than LV size, return success */
- if (offset <= bdatt->iatt.ia_size) {
- memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
- bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
- op_ret = 0;
- goto out;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr);
- BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
-
- if (fd) {
- local->inode = inode_ref (fd->inode);
- local->fd = fd_ref (fd);
- } else {
- local->inode = inode_ref (loc->inode);
- loc_copy (&local->loc, loc);
- }
-
- local->bdatt->iatt.ia_size =
- bd_adjust_size (this->private, offset);
-
- STACK_WIND (frame, bd_trunc_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
-
- return;
-
-out:
- if (fd)
- BD_STACK_UNWIND (ftruncate, frame, op_ret, op_errno,
- &prebuf, &bdatt->iatt, NULL);
- else
- BD_STACK_UNWIND (truncate, frame, op_ret, op_errno,
- &prebuf, &bdatt->iatt, NULL);
- return;
-}
-
-/*
- * bd_ftruncate: Resizes a LV if fd belongs to BD.
- */
-int32_t
-bd_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- int op_errno = 0;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
- STACK_WIND (frame, default_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd,
- offset, xdata);
- return 0;
- }
-
- bd_do_trunc (frame, this, fd, NULL, offset, bdatt);
- return 0;
-out:
- BD_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-/*
- * bd_truncate: Resizes a LV if file maps to LV.
- */
-int32_t
-bd_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- int op_errno = 0;
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- if (bd_inode_ctx_get (loc->inode, this, &bdatt)) {
- STACK_WIND (frame, default_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc,
- offset, xdata);
- return 0;
- }
-
- bd_do_trunc (frame, this, NULL, loc, offset, bdatt);
- return 0;
-
-out:
- BD_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-__bd_pwritev (int fd, struct iovec *vector, int count, off_t offset,
- uint64_t bd_size)
-{
- int index = 0;
- int retval = 0;
- off_t internal_offset = 0;
-
- if (!vector)
- return -EFAULT;
-
- retval = pwritev (fd, vector, count, offset);
- if (retval == -1) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "base %p, length %ld, offset %ld, message %s",
- vector[index].iov_base, vector[index].iov_len,
- internal_offset, strerror (errno));
- retval = -errno;
- goto err;
- }
-/*
-
-
- internal_offset = offset;
- for (index = 0; index < count; index++) {
- if (internal_offset > bd_size) {
- op_ret = -ENOSPC;
- goto err;
- }
- if (internal_offset + vector[index].iov_len > bd_size) {
- vector[index].iov_len = bd_size - internal_offset;
- no_space = 1;
- }
- retval = pwritev (fd, vector[index].iov_base,
- vector[index].iov_len, internal_offset);
- if (retval == -1) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "base %p, length %ld, offset %ld, message %s",
- vector[index].iov_base, vector[index].iov_len,
- internal_offset, strerror (errno));
- op_ret = -errno;
- goto err;
- }
- op_ret += retval;
- internal_offset += retval;
- if (no_space)
- break;
- }
-*/
-err:
- return retval;
-}
-
-/*
- * bd_writev: Writes to LV if its BD file or forwards the request to posix_write
- * bd_writev -> posix_writev -> bd_writev_cbk
- */
-int
-bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdict)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int _fd = -1;
- bd_fd_t *bd_fd = NULL;
- int ret = -1;
- uint64_t size = 0;
- struct iatt prebuf = {0, };
- bd_attr_t *bdatt = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (vector, out);
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd) { /* posix fd */
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count,
- offset, flags, iobref, xdict);
- return 0;
- }
-
- _fd = bd_fd->fd;
-
- if (bd_inode_ctx_get (fd->inode, this, &bdatt)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- size = bdatt->iatt.ia_size;
-
- op_ret = __bd_pwritev (_fd, vector, count, offset, size);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64
- ", %s", offset, strerror (op_errno));
- goto out;
- }
-
- memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt));
- bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME);
-out:
-
- BD_STACK_UNWIND (writev, frame, op_ret, op_errno, &prebuf,
- &bdatt->iatt, NULL);
- return 0;
-}
-
-int
-bd_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- bd_attr_t *bdatt = NULL;
- int *valid = cookie;
- bd_local_t *local = frame->local;
-
- if (op_ret < 0 || !valid || !local)
- goto out;
-
- if (bd_inode_ctx_get (local->inode, this, &bdatt))
- goto out;
-
- if (*valid & GF_SET_ATTR_UID)
- bdatt->iatt.ia_uid = postbuf->ia_uid;
- else if (*valid & GF_SET_ATTR_GID)
- bdatt->iatt.ia_gid = postbuf->ia_gid;
- else if (*valid & GF_SET_ATTR_MODE) {
- bdatt->iatt.ia_type = postbuf->ia_type;
- bdatt->iatt.ia_prot = postbuf->ia_prot;
- } else if (*valid & GF_SET_ATTR_ATIME) {
- bdatt->iatt.ia_atime = postbuf->ia_atime;
- bdatt->iatt.ia_atime_nsec = postbuf->ia_atime_nsec;
- } else if (*valid & GF_SET_ATTR_MTIME) {
- bdatt->iatt.ia_mtime = postbuf->ia_mtime;
- bdatt->iatt.ia_mtime_nsec = postbuf->ia_mtime_nsec;
- }
-
- bdatt->iatt.ia_ctime = postbuf->ia_ctime;
- bdatt->iatt.ia_ctime_nsec = postbuf->ia_ctime_nsec;
-
- memcpy (postbuf, &bdatt->iatt, sizeof (struct iatt));
-out:
- GF_FREE (valid);
- BD_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
-}
-
-int
-bd_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- bd_local_t *local = NULL;
- bd_attr_t *bdatt = NULL;
- int *ck_valid = NULL;
- int op_errno = 0;
-
- if (bd_inode_ctx_get (loc->inode, this, &bdatt)) {
- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- ck_valid = GF_CALLOC (1, sizeof (valid), gf_bd_int32_t);
- BD_VALIDATE_MEM_ALLOC (ck_valid, op_errno, out);
-
- local->inode = inode_ref (loc->inode);
- *ck_valid = valid;
-
- STACK_WIND_COOKIE (frame, bd_setattr_cbk, ck_valid, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid, xdata);
-
- return 0;
-out:
- BD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, xdata);
- return 0;
-}
-
-int
-bd_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- bd_attr_t *bdatt = NULL;
-
- if (op_ret < 0)
- goto out;
-
- if (bd_inode_ctx_get (inode, this, &bdatt))
- goto out;
-
- bdatt->iatt.ia_ctime = buf->ia_ctime;
- bdatt->iatt.ia_ctime_nsec = buf->ia_ctime_nsec;
- bdatt->iatt.ia_nlink = buf->ia_nlink;
- memcpy (buf, &bdatt->iatt, sizeof (struct iatt));
-
-out:
- BD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, NULL);
- return 0;
-}
-
-int
-bd_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, bd_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-int
-bd_handle_special_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- dict_t *xattr = NULL;
- int op_ret = -1;
- int op_errno = ENOMEM;;
- bd_priv_t *priv = this->private;
-
- xattr = dict_new ();
- if (!xattr)
- goto out;
-
- if (!strcmp (name, VOL_TYPE))
- op_ret = dict_set_int64 (xattr, (char *)name, 1);
- else if (!strcmp (name, VOL_CAPS))
- op_ret = dict_set_int64 (xattr, (char *)name, priv->caps);
- else
- op_ret = bd_get_origin (this->private, loc, fd, xattr);
-
-out:
- if (loc)
- BD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr,
- xdata);
- else
- BD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
- xdata);
-
- op_ret = dict_reset (xattr);
- dict_unref (xattr);
-
- return 0;
-}
-
-int
-bd_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)
- || !strcmp (name, BD_ORIGIN)))
- bd_handle_special_xattrs (frame, this, NULL, fd, name, xdata);
- else
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd, name, xdata);
- return 0;
-}
-
-int
-bd_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)
- || !strcmp (name, BD_ORIGIN)))
- bd_handle_special_xattrs (frame, this, loc, NULL, name, xdata);
- else
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc, name, xdata);
-
- return 0;
-}
-
-int
-bd_unlink_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- bd_gfid_t gfid = {0, };
- bd_local_t *local = frame->local;
-
- if (buf->ia_nlink > 1)
- goto posix;
-
- BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out);
-
- uuid_utoa_r (inode->gfid, gfid);
- if (bd_delete_lv (this->private, gfid, &op_errno) < 0) {
- if (op_errno != ENOENT)
- goto out;
- }
-
-posix:
- /* remove posix */
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- &local->loc, 0, NULL);
-
- return 0;
-out:
- BD_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int
-bd_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
-{
- int op_errno = 0;
- bd_attr_t *bdatt = NULL;
- bd_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- if (bd_inode_ctx_get (loc->inode, this, &bdatt)) {
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
- }
-
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- loc_copy (&local->loc, loc);
-
- STACK_WIND (frame, bd_unlink_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, NULL);
- return 0;
-out:
- BD_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-bd_priv (xlator_t *this)
-{
- return 0;
-}
-
-int32_t
-bd_inode (xlator_t *this)
-{
- return 0;
-}
-
-int32_t
-bd_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
-{
- int op_ret = -1;
- int op_errno = 0;
- int ret = 0;
- int _fd = -1;
- char *alloc_buf = NULL;
- char *buf = NULL;
- int32_t weak_checksum = 0;
- bd_fd_t *bd_fd = NULL;
- unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = bd_fd_ctx_get (this, fd, &bd_fd);
- if (ret < 0 || !bd_fd) {
- STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rchecksum, fd, offset,
- len, xdata);
- return 0;
- }
-
- memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
-
- alloc_buf = page_aligned_alloc (len, &buf);
- if (!alloc_buf) {
- op_errno = ENOMEM;
- goto out;
- }
-
- _fd = bd_fd->fd;
-
- LOCK (&fd->lock);
- {
- ret = pread (_fd, buf, len, offset);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pread of %d bytes returned %d (%s)",
- len, ret, strerror (errno));
- op_errno = errno;
- }
- }
- UNLOCK (&fd->lock);
-
- if (ret < 0)
- goto out;
-
- weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf,
- (size_t) len);
- gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len,
- (unsigned char *) strong_checksum);
-
- op_ret = 0;
-out:
- BD_STACK_UNWIND (rchecksum, frame, op_ret, op_errno,
- weak_checksum, strong_checksum, NULL);
-
- GF_FREE (alloc_buf);
-
- return 0;
-}
-
-static int
-bd_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
-{
- int32_t ret = 0;
- struct iatt statpre = {0,};
- struct iatt statpost = {0,};
- bd_attr_t *bdatt = NULL;
-
- /* iatt already cached */
- if (bd_inode_ctx_get (fd->inode, this, &bdatt) < 0) {
- STACK_WIND (frame, default_zerofill_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->zerofill,
- fd, offset, len, xdata);
- return 0;
- }
-
- ret = bd_do_zerofill(frame, this, fd, offset, len,
- &statpre, &statpost);
- if (ret)
- goto err;
-
- STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
- return 0;
-
-err:
- STACK_UNWIND_STRICT(zerofill, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-/**
- * notify - when parent sends PARENT_UP, send CHILD_UP event from here
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- switch (event)
- {
- case GF_EVENT_PARENT_UP:
- {
- /* Tell the parent that bd xlator is up */
- default_notify (this, GF_EVENT_CHILD_UP, data);
- }
- break;
- default:
- break;
- }
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_bd_mt_end + 1);
-
- if (ret != 0)
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
-
- return ret;
-}
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- int ret = -1;
- bd_priv_t *priv = this->private;
-
- GF_OPTION_RECONF ("bd-aio", priv->aio_configured, options,
- bool, out);
-
- if (priv->aio_configured)
- bd_aio_on (this);
- else
- bd_aio_off (this);
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * bd xlator init - Validate configured VG
- */
-int
-init (xlator_t *this)
-{
- int ret = 0;
- char *vg_data = NULL;
- char *device = NULL;
- bd_priv_t *_private = NULL;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: storage/bd needs posix as subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. Please check the volume file.");
- }
-
- GF_OPTION_INIT ("export", vg_data, str, error);
- GF_OPTION_INIT ("device", device, str, error);
-
- /* Now we support only LV device */
- if (strcasecmp (device, BACKEND_VG)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: unknown %s backend %s", BD_XLATOR, device);
- return -1;
- }
-
- this->local_pool = mem_pool_new (bd_local_t, 64);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: Failed to create bd memory pool");
- return -1;
- }
-
- ret = 0;
- _private = GF_CALLOC (1, sizeof (*_private), gf_bd_private);
- if (!_private)
- goto error;
-
- this->private = _private;
- _private->vg = gf_strdup (vg_data);
- if (!_private->vg)
- goto error;
-
- _private->handle = lvm_init (NULL);
- if (!_private->handle) {
- gf_log (this->name, GF_LOG_CRITICAL, "lvm_init failed");
- goto error;
- }
- _private->caps = BD_CAPS_BD;
- if (bd_scan_vg (this, _private))
- goto error;
-
- _private->aio_init_done = _gf_false;
- _private->aio_capable = _gf_false;
-
- GF_OPTION_INIT ("bd-aio", _private->aio_configured, bool, error);
- if (_private->aio_configured) {
- if (bd_aio_on (this)) {
- gf_log (this->name, GF_LOG_ERROR,
- "BD AIO init failed");
- ret = -1;
- goto error;
- }
- }
-
- _private->caps |= BD_CAPS_OFFLOAD_COPY | BD_CAPS_OFFLOAD_SNAPSHOT |
- BD_CAPS_OFFLOAD_ZERO;
-
- return 0;
-error:
- if (_private) {
- GF_FREE (_private->vg);
- if (_private->handle)
- lvm_quit (_private->handle);
- GF_FREE (_private);
- }
-
- mem_pool_destroy (this->local_pool);
-
- return -1;
-}
-
-void
-fini (xlator_t *this)
-{
- bd_priv_t *priv = this->private;
- mem_pool_destroy (this->local_pool);
- this->local_pool = NULL;
- if (!priv)
- return;
- lvm_quit (priv->handle);
- GF_FREE (priv->vg);
- this->private = NULL;
- GF_FREE (priv);
- return;
-}
-
-struct xlator_dumpops dumpops = {
- .priv = bd_priv,
- .inode = bd_inode,
-};
-
-struct xlator_fops fops = {
- .readdirp = bd_readdirp,
- .lookup = bd_lookup,
- .stat = bd_stat,
- .statfs = bd_statfs,
- .open = bd_open,
- .fstat = bd_fstat,
- .rchecksum = bd_rchecksum,
- .readv = bd_readv,
- .fsync = bd_fsync,
- .setxattr = bd_setxattr,
- .fsetxattr = bd_fsetxattr,
- .removexattr = bd_removexattr,
- .fremovexattr=bd_fremovexattr,
- .truncate = bd_truncate,
- .ftruncate = bd_ftruncate,
- .writev = bd_writev,
- .getxattr = bd_getxattr,
- .fgetxattr = bd_fgetxattr,
- .unlink = bd_unlink,
- .link = bd_link,
- .flush = bd_flush,
- .setattr = bd_setattr,
- .discard = bd_discard,
- .zerofill = bd_zerofill,
-};
-
-struct xlator_cbks cbks = {
- .release = bd_release,
- .forget = bd_forget,
-};
-
-struct volume_options options[] = {
- { .key = {"export"},
- .type = GF_OPTION_TYPE_STR},
- { .key = {"device"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = BACKEND_VG},
- {
- .key = {"bd-aio"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Support for native Linux AIO"
- },
-
- { .key = {NULL} }
-};
diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h
deleted file mode 100644
index 62add16cdf4..00000000000
--- a/xlators/storage/bd/src/bd.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- BD translator - Exports Block devices on server side as regular
- files to client
-
- Copyright IBM, Corp. 2012
-
- This file is part of GlusterFS.
-
- Author:
- M. Mohan Kumar <mohan@in.ibm.com>
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _BD_H
-#define _BD_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_LIBAIO
-#include <libaio.h>
-#endif
-
-#include "xlator.h"
-#include "mem-types.h"
-
-#define BD_XLATOR "block device mapper xlator"
-#define BACKEND_VG "vg"
-#define GF_XATTR "user.glusterfs"
-#define BD_XATTR GF_XATTR ".bd"
-
-#define BD_LV "lv"
-#define BD_THIN "thin"
-
-#define VOL_TYPE "volume.type"
-#define VOL_CAPS "volume.caps"
-
-#define ALIGN_SIZE 4096
-
-#define BD_CAPS_BD 0x01
-#define BD_CAPS_THIN 0x02
-#define BD_CAPS_OFFLOAD_COPY 0x04
-#define BD_CAPS_OFFLOAD_SNAPSHOT 0x08
-#define BD_CAPS_OFFLOAD_ZERO 0x20
-
-#define BD_CLONE "clone"
-#define BD_SNAPSHOT "snapshot"
-#define BD_MERGE "merge"
-#define BD_ORIGIN "list-origin"
-
-#define IOV_NR 4
-#define IOV_SIZE (64 * 1024)
-
-#define ALIGN_SIZE 4096
-#define LINKTO "trusted.glusterfs.dht.linkto"
-
-#define MAX_NO_VECT 1024
-
-
-#define BD_VALIDATE_MEM_ALLOC(buff, op_errno, label) \
- if (!buff) { \
- op_errno = ENOMEM; \
- gf_log (this->name, GF_LOG_ERROR, "out of memory"); \
- goto label; \
- }
-
-#define BD_VALIDATE_LOCAL_OR_GOTO(local, op_errno, label) \
- if (!local) { \
- op_errno = EINVAL; \
- goto label; \
- }
-
-#define BD_STACK_UNWIND(typ, frame, args ...) do { \
- bd_local_t *__local = frame->local; \
- xlator_t *__this = frame->this; \
- \
- frame->local = NULL; \
- STACK_UNWIND_STRICT (typ, frame, args); \
- if (__local) \
- bd_local_free (__this, __local); \
- } while (0)
-
-typedef char bd_gfid_t[GF_UUID_BUF_SIZE];
-
-/**
- * bd_fd - internal structure
- */
-typedef struct bd_fd {
- int fd;
- int32_t flag;
- int odirect;
-} bd_fd_t;
-
-typedef struct bd_priv {
- lvm_t handle;
- char *vg;
- char *pool;
- int caps;
- gf_boolean_t aio_init_done;
- gf_boolean_t aio_capable;
- gf_boolean_t aio_configured;
-#ifdef HAVE_LIBAIO
- io_context_t ctxp;
- pthread_t aiothread;
-#endif
-} bd_priv_t;
-
-
-typedef enum bd_type {
- BD_TYPE_NONE,
- BD_TYPE_LV,
-} bd_type_t;
-
-typedef struct {
- struct iatt iatt;
- char *type;
-} bd_attr_t;
-
-typedef enum {
- BD_OF_NONE,
- BD_OF_CLONE,
- BD_OF_SNAPSHOT,
- BD_OF_MERGE,
-} bd_offload_t;
-
-typedef struct {
- dict_t *dict;
- bd_attr_t *bdatt;
- inode_t *inode;
- loc_t loc;
- fd_t *fd;
- data_t *data; /* for setxattr */
- bd_offload_t offload;
- uint64_t size;
- loc_t *dloc;
-} bd_local_t;
-
-/* Prototypes */
-int bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx);
-int bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx);
-int bd_scan_vg (xlator_t *this, bd_priv_t *priv);
-bd_local_t *bd_local_init (call_frame_t *frame, xlator_t *this);
-void bd_local_free (xlator_t *this, bd_local_t *local);
-int bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd);
-char *page_aligned_alloc (size_t size, char **aligned_buf);
-int bd_validate_bd_xattr (xlator_t *this, char *bd, char **type,
- uint64_t *lv_size, uuid_t uuid);
-uint64_t bd_get_default_extent (bd_priv_t *priv);
-uint64_t bd_adjust_size (bd_priv_t *priv, uint64_t size);
-int bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv);
-int bd_resize (bd_priv_t *priv, uuid_t uuid, off_t size);
-int bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno);
-int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv);
-int bd_clone (bd_local_t *local, bd_priv_t *priv);
-
-int bd_merge (bd_priv_t *priv, uuid_t gfid);
-int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);
-void bd_update_amtime(struct iatt *iatt, int flag);
-int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv);
-int bd_clone (bd_local_t *local, bd_priv_t *priv);
-int bd_merge (bd_priv_t *priv, uuid_t gfid);
-int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);
-int bd_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, off_t len, struct iatt *prebuf,
- struct iatt *postbuf);
-
-#endif
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index 88efcc784db..c080a229ff3 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -1,19 +1,25 @@
-
+if WITH_SERVER
xlator_LTLIBRARIES = posix.la
+endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
-posix_la_LDFLAGS = -module -avoid-version
+posix_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c
-posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO)
+posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c \
+ posix-gfid-path.c posix-entry-ops.c posix-inode-fd-ops.c \
+ posix-common.c posix-metadata.c
+posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO) \
+ $(ACL_LIBS)
-noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h
+noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \
+ posix-messages.h posix-gfid-path.h posix-inode-handle.h \
+ posix-metadata.h posix-metadata-disk.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/timer-wheel
-AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) -I$(top_srcdir)/glusterfsd/src
CLEANFILES =
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
index c3bbddd6737..d0cb0002bbf 100644
--- a/xlators/storage/posix/src/posix-aio.c
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -7,563 +7,550 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "glusterfs.h"
#include "posix.h"
#include <sys/uio.h>
+#include "posix-messages.h"
#ifdef HAVE_LIBAIO
#include <libaio.h>
-
void
-__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
- off_t offset, size_t size)
+__posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
{
- int odirect = 0;
- int flags = 0;
- int ret = 0;
-
- odirect = pfd->odirect;
-
- if ((fd->flags|opflags) & O_DIRECT) {
- /* if instructed, use O_DIRECT always */
- odirect = 1;
- } else {
- /* else use O_DIRECT when feasible */
- if ((offset|size) & 0xfff)
- odirect = 0;
- else
- odirect = 1;
- }
-
- if (!odirect && pfd->odirect) {
- flags = fcntl (pfd->fd, F_GETFL);
- ret = fcntl (pfd->fd, F_SETFL, (flags & (~O_DIRECT)));
- pfd->odirect = 0;
- }
-
- if (odirect && !pfd->odirect) {
- flags = fcntl (pfd->fd, F_GETFL);
- ret = fcntl (pfd->fd, F_SETFL, (flags | O_DIRECT));
- pfd->odirect = 1;
- }
-
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d",
- strerror (errno), pfd->fd, flags, pfd->odirect);
- }
+ int odirect = 0;
+ int flags = 0;
+ int ret = 0;
+
+ odirect = pfd->odirect;
+
+ if ((fd->flags | opflags) & O_DIRECT) {
+ /* if instructed, use O_DIRECT always */
+ odirect = 1;
+ } else {
+ /* else use O_DIRECT when feasible */
+ if ((offset | size) & 0xfff)
+ odirect = 0;
+ else
+ odirect = 1;
+ }
+
+ if (!odirect && pfd->odirect) {
+ flags = fcntl(pfd->fd, F_GETFL);
+ ret = fcntl(pfd->fd, F_SETFL, (flags & (~O_DIRECT)));
+ pfd->odirect = 0;
+ }
+
+ if (odirect && !pfd->odirect) {
+ flags = fcntl(pfd->fd, F_GETFL);
+ ret = fcntl(pfd->fd, F_SETFL, (flags | O_DIRECT));
+ pfd->odirect = 1;
+ }
+
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_FCNTL_FAILED,
+ "fcntl() failed. fd=%d flags=%d pfd->odirect=%d", pfd->fd, flags,
+ pfd->odirect);
+ }
}
-
struct posix_aio_cb {
- struct iocb iocb;
- call_frame_t *frame;
- struct iobuf *iobuf;
- struct iobref *iobref;
- struct iatt prebuf;
- int fd;
- int op;
- off_t offset;
+ struct iocb iocb;
+ call_frame_t *frame;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ struct iatt prebuf;
+ int _fd;
+ fd_t *fd;
+ int op;
+ off_t offset;
};
-
int
-posix_aio_readv_complete (struct posix_aio_cb *paiocb, int res, int res2)
+posix_aio_readv_complete(struct posix_aio_cb *paiocb, int res, int res2)
{
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- struct iobuf *iobuf = NULL;
- struct iatt postbuf = {0,};
- int _fd = -1;
- int op_ret = -1;
- int op_errno = 0;
- struct iovec iov;
- struct iobref *iobref = NULL;
- int ret = 0;
- off_t offset = 0;
- struct posix_private * priv = NULL;
-
-
- frame = paiocb->frame;
- this = frame->this;
- priv = this->private;
- iobuf = paiocb->iobuf;
- _fd = paiocb->fd;
- offset = paiocb->offset;
-
- if (res < 0) {
- op_ret = -1;
- op_errno = -res;
- gf_log (this->name, GF_LOG_ERROR,
- "readv(async) failed fd=%d,size=%lu,offset=%llu (%d/%s)",
- _fd, paiocb->iocb.u.c.nbytes,
- (unsigned long long) paiocb->offset,
- res, strerror (op_errno));
- goto out;
- }
-
- ret = posix_fdstat (this, _fd, &postbuf);
- if (ret != 0) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat failed on fd=%d: %s", _fd,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = res;
- op_errno = 0;
-
- iobref = iobref_new ();
- if (!iobref) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf_ptr (iobuf);
- iov.iov_len = op_ret;
-
-
- /* Hack to notify higher layers of EOF. */
- if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
- op_errno = ENOENT;
-
- LOCK (&priv->lock);
- {
- priv->read_value += op_ret;
- }
- UNLOCK (&priv->lock);
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iatt postbuf = {
+ 0,
+ };
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ struct iovec iov;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ struct posix_private *priv = NULL;
+ fd_t *fd = NULL;
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ iobuf = paiocb->iobuf;
+ fd = paiocb->fd;
+ _fd = paiocb->_fd;
+ offset = paiocb->offset;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_READV_FAILED,
+ "readv(async) failed fd=%d,size=%lu,offset=%llu (%d)", _fd,
+ paiocb->iocb.u.c.nbytes, (unsigned long long)paiocb->offset,
+ res);
+ goto out;
+ }
+
+ ret = posix_fdstat(this, fd->inode, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%d", _fd);
+ goto out;
+ }
+
+ op_ret = res;
+ op_errno = 0;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf_ptr(iobuf);
+ iov.iov_len = op_ret;
+
+ /* Hack to notify higher layers of EOF. */
+ if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
+ op_errno = ENOENT;
+
+ GF_ATOMIC_ADD(priv->read_value, op_ret);
out:
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
- &postbuf, iobref, NULL);
- if (iobuf)
- iobuf_unref (iobuf);
- if (iobref)
- iobref_unref (iobref);
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf,
+ iobref, NULL);
+ if (iobuf)
+ iobuf_unref(iobuf);
+ if (iobref)
+ iobref_unref(iobref);
- GF_FREE (paiocb);
+ if (paiocb->fd)
+ fd_unref(paiocb->fd);
- return 0;
-}
+ GF_FREE(paiocb);
+ return 0;
+}
int
-posix_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+posix_aio_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- int32_t op_errno = EINVAL;
- int _fd = -1;
- struct iobuf *iobuf = NULL;
- struct posix_fd * pfd = NULL;
- int ret = -1;
- struct posix_aio_cb *paiocb = NULL;
- struct posix_private *priv = NULL;
- struct iocb *iocb = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- priv = this->private;
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto err;
- }
- _fd = pfd->fd;
-
- if (!size) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
- goto err;
- }
-
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (!iobuf) {
- op_errno = ENOMEM;
- goto err;
- }
-
- paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
- if (!paiocb) {
- op_errno = ENOMEM;
- goto err;
- }
-
-
- paiocb->frame = frame;
- paiocb->iobuf = iobuf;
- paiocb->offset = offset;
- paiocb->fd = _fd;
- paiocb->op = GF_FOP_READ;
-
- paiocb->iocb.data = paiocb;
- paiocb->iocb.aio_fildes = _fd;
- paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
- paiocb->iocb.aio_reqprio = 0;
- paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
- paiocb->iocb.u.c.nbytes = size;
- paiocb->iocb.u.c.offset = offset;
-
- iocb = &paiocb->iocb;
-
- LOCK (&fd->lock);
- {
- __posix_fd_set_odirect (fd, pfd, flags, offset, size);
-
- ret = io_submit (priv->ctxp, 1, &iocb);
- }
- UNLOCK (&fd->lock);
-
- if (ret != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "io_submit() returned %d", ret);
- op_errno = -ret;
- goto err;
- }
-
- return 0;
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct iobuf *iobuf = NULL;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_INVALID_ARGUMENT,
+ "size=%" GF_PRI_SIZET, size);
+ goto err;
+ }
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb = GF_CALLOC(1, sizeof(*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb->frame = frame;
+ paiocb->iobuf = iobuf;
+ paiocb->offset = offset;
+ paiocb->fd = fd_ref(fd);
+ paiocb->_fd = _fd;
+ paiocb->op = GF_FOP_READ;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.c.buf = iobuf_ptr(iobuf);
+ paiocb->iocb.u.c.nbytes = size;
+ paiocb->iocb.u.c.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ LOCK(&fd->lock);
+ {
+ __posix_fd_set_odirect(fd, pfd, flags, offset, size);
+
+ ret = io_submit(priv->ctxp, 1, &iocb);
+ }
+ UNLOCK(&fd->lock);
+
+ if (ret != 1) {
+ op_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_IO_SUBMIT_FAILED,
+ "io_submit() returned %d", ret);
+ goto err;
+ }
+
+ return 0;
err:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
- if (iobuf)
- iobuf_unref (iobuf);
+ STACK_UNWIND_STRICT(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ if (iobuf)
+ iobuf_unref(iobuf);
- if (paiocb)
- GF_FREE (paiocb);
+ if (paiocb) {
+ if (paiocb->fd)
+ fd_unref(paiocb->fd);
+ GF_FREE(paiocb);
+ }
- return 0;
+ return 0;
}
-
int
-posix_aio_writev_complete (struct posix_aio_cb *paiocb, int res, int res2)
+posix_aio_writev_complete(struct posix_aio_cb *paiocb, int res, int res2)
{
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- struct iatt prebuf = {0,};
- struct iatt postbuf = {0,};
- int _fd = -1;
- int op_ret = -1;
- int op_errno = 0;
- int ret = 0;
- struct posix_private * priv = NULL;
-
-
- frame = paiocb->frame;
- this = frame->this;
- priv = this->private;
- prebuf = paiocb->prebuf;
- _fd = paiocb->fd;
-
- if (res < 0) {
- op_ret = -1;
- op_errno = -res;
- gf_log (this->name, GF_LOG_ERROR,
- "writev(async) failed fd=%d,offset=%llu (%d/%s)",
- _fd, (unsigned long long) paiocb->offset, res,
- strerror (op_errno));
-
- goto out;
- }
-
- ret = posix_fdstat (this, _fd, &postbuf);
- if (ret != 0) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat failed on fd=%d: %s", _fd,
- strerror (op_errno));
- goto out;
- }
-
-
- op_ret = res;
- op_errno = 0;
-
- LOCK (&priv->lock);
- {
- priv->write_value += op_ret;
- }
- UNLOCK (&priv->lock);
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iatt prebuf = {
+ 0,
+ };
+ struct iatt postbuf = {
+ 0,
+ };
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ struct posix_private *priv = NULL;
+ fd_t *fd = NULL;
+
+ if (!paiocb) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ prebuf = paiocb->prebuf;
+ fd = paiocb->fd;
+ _fd = paiocb->_fd;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITEV_FAILED,
+ "writev(async) failed fd=%d,offset=%llu (%d)", _fd,
+ (unsigned long long)paiocb->offset, res);
+
+ goto out;
+ }
+
+ ret = posix_fdstat(this, fd->inode, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%d", _fd);
+ goto out;
+ }
+
+ op_ret = res;
+ op_errno = 0;
+
+ GF_ATOMIC_ADD(priv->write_value, op_ret);
out:
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
- NULL);
-
- if (paiocb) {
- if (paiocb->iobref)
- iobref_unref (paiocb->iobref);
- GF_FREE (paiocb);
- }
-
- return 0;
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref(paiocb->iobref);
+ if (paiocb->fd)
+ fd_unref(paiocb->fd);
+ GF_FREE(paiocb);
+ }
+
+ return 0;
}
-
int
-posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *iov, int count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+posix_aio_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *iov, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- int32_t op_errno = EINVAL;
- int _fd = -1;
- struct posix_fd * pfd = NULL;
- int ret = -1;
- struct posix_aio_cb *paiocb = NULL;
- struct posix_private *priv = NULL;
- struct iocb *iocb = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- priv = this->private;
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto err;
- }
- _fd = pfd->fd;
-
- paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
- if (!paiocb) {
- op_errno = ENOMEM;
- goto err;
- }
-
-
- paiocb->frame = frame;
- paiocb->offset = offset;
- paiocb->fd = _fd;
- paiocb->op = GF_FOP_WRITE;
-
- paiocb->iocb.data = paiocb;
- paiocb->iocb.aio_fildes = _fd;
- paiocb->iobref = iobref_ref (iobref);
- paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
- paiocb->iocb.aio_reqprio = 0;
- paiocb->iocb.u.v.vec = iov;
- paiocb->iocb.u.v.nr = count;
- paiocb->iocb.u.v.offset = offset;
-
- iocb = &paiocb->iocb;
-
- ret = posix_fdstat (this, _fd, &paiocb->prebuf);
- if (ret != 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto err;
- }
-
-
- LOCK (&fd->lock);
- {
- __posix_fd_set_odirect (fd, pfd, flags, offset,
- iov_length (iov, count));
-
- ret = io_submit (priv->ctxp, 1, &iocb);
- }
- UNLOCK (&fd->lock);
-
- if (ret != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "io_submit() returned %d", ret);
- op_errno = -ret;
- goto err;
- }
-
- return 0;
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_errno, op_errno, err);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ paiocb = GF_CALLOC(1, sizeof(*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb->frame = frame;
+ paiocb->offset = offset;
+ paiocb->fd = fd_ref(fd);
+ paiocb->_fd = _fd;
+ paiocb->op = GF_FOP_WRITE;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iobref = iobref_ref(iobref);
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.v.vec = iov;
+ paiocb->iocb.u.v.nr = count;
+ paiocb->iocb.u.v.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ ret = posix_fdstat(this, fd->inode, _fd, &paiocb->prebuf);
+ if (ret != 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
+ goto err;
+ }
+
+ LOCK(&fd->lock);
+ {
+ __posix_fd_set_odirect(fd, pfd, flags, offset, iov_length(iov, count));
+
+ ret = io_submit(priv->ctxp, 1, &iocb);
+ }
+ UNLOCK(&fd->lock);
+
+ if (ret != 1) {
+ op_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_IO_SUBMIT_FAILED,
+ "io_submit() returned %d,gfid=%s", ret,
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ return 0;
err:
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, 0, 0, 0);
- if (paiocb) {
- if (paiocb->iobref)
- iobref_unref (paiocb->iobref);
- GF_FREE (paiocb);
- }
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref(paiocb->iobref);
+ if (paiocb->fd)
+ fd_unref(paiocb->fd);
+ GF_FREE(paiocb);
+ }
- return 0;
+ return 0;
}
-
void *
-posix_aio_thread (void *data)
+posix_aio_thread(void *data)
{
- xlator_t *this = NULL;
- struct posix_private *priv = NULL;
- int ret = 0;
- int i = 0;
- struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS];
- struct io_event *event = NULL;
- struct posix_aio_cb *paiocb = NULL;
-
- this = data;
- THIS = this;
- priv = this->private;
-
- for (;;) {
- memset (&events[0], 0, sizeof (events));
- ret = io_getevents (priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS,
- &events[0], NULL);
- if (ret <= 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "io_getevents() returned %d", ret);
- if (ret == -EINTR)
- continue;
- break;
- }
-
- for (i = 0; i < ret; i++) {
- event = &events[i];
-
- paiocb = event->data;
-
- switch (paiocb->op) {
- case GF_FOP_READ:
- posix_aio_readv_complete (paiocb, event->res,
- event->res2);
- break;
- case GF_FOP_WRITE:
- posix_aio_writev_complete (paiocb, event->res,
- event->res2);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown op %d found in piocb",
- paiocb->op);
- break;
- }
- }
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS];
+ struct io_event *event = NULL;
+ struct posix_aio_cb *paiocb = NULL;
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ for (;;) {
+ memset(&events[0], 0, sizeof(events));
+ ret = io_getevents(priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS,
+ &events[0], NULL);
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_IO_GETEVENTS_FAILED,
+ "io_getevents() returned %d", ret);
+ if (ret == -EINTR)
+ continue;
+ break;
}
- return NULL;
-}
+ for (i = 0; i < ret; i++) {
+ event = &events[i];
+
+ paiocb = event->data;
+
+ switch (paiocb->op) {
+ case GF_FOP_READ:
+ posix_aio_readv_complete(paiocb, event->res, event->res2);
+ break;
+ case GF_FOP_WRITE:
+ posix_aio_writev_complete(paiocb, event->res, event->res2);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_OP,
+ "unknown op %d found in piocb", paiocb->op);
+ break;
+ }
+ }
+ }
+ return NULL;
+}
int
-posix_aio_init (xlator_t *this)
+posix_aio_init(xlator_t *this)
{
- struct posix_private *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- ret = io_setup (POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp);
- if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
- gf_log (this->name, GF_LOG_WARNING,
- "Linux AIO not available at run-time."
- " Continuing with synchronous IO");
- ret = 0;
- goto out;
- }
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "io_setup() failed. ret=%d, errno=%d",
- ret, errno);
- goto out;
- }
-
- ret = gf_thread_create (&priv->aiothread, NULL,
- posix_aio_thread, this);
- if (ret != 0) {
- io_destroy (priv->ctxp);
- goto out;
- }
-
- this->fops->readv = posix_aio_readv;
- this->fops->writev = posix_aio_writev;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = io_setup(POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp);
+ if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at run-time."
+ " Continuing with synchronous IO");
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_IO_SETUP_FAILED,
+ "io_setup() failed. ret=%d", ret);
+ goto out;
+ }
+
+ ret = gf_thread_create(&priv->aiothread, NULL, posix_aio_thread, this,
+ "posixaio");
+ if (ret != 0) {
+ io_destroy(priv->ctxp);
+ goto out;
+ }
+
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
out:
- return ret;
+ return ret;
}
-
int
-posix_aio_on (xlator_t *this)
+posix_aio_on(xlator_t *this)
{
- struct posix_private *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- if (!priv->aio_init_done) {
- ret = posix_aio_init (this);
- if (ret == 0)
- priv->aio_capable = _gf_true;
- else
- priv->aio_capable = _gf_false;
- priv->aio_init_done = _gf_true;
- }
-
- if (priv->aio_capable) {
- this->fops->readv = posix_aio_readv;
- this->fops->writev = posix_aio_writev;
- }
-
- return ret;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->aio_init_done) {
+ ret = posix_aio_init(this);
+ if (ret == 0)
+ priv->aio_capable = _gf_true;
+ else
+ priv->aio_capable = _gf_false;
+ priv->aio_init_done = _gf_true;
+ }
+
+ if (priv->aio_capable) {
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+ }
+
+ return ret;
}
int
-posix_aio_off (xlator_t *this)
+posix_aio_off(xlator_t *this)
{
- this->fops->readv = posix_readv;
- this->fops->writev = posix_writev;
+ this->fops->readv = posix_readv;
+ this->fops->writev = posix_writev;
- return 0;
+ return 0;
}
-
#else
-
int
-posix_aio_on (xlator_t *this)
+posix_aio_on(xlator_t *this)
{
- gf_log (this->name, GF_LOG_INFO,
- "Linux AIO not available at build-time."
- " Continuing with synchronous IO");
- return 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
}
int
-posix_aio_off (xlator_t *this)
+posix_aio_off(xlator_t *this)
{
- gf_log (this->name, GF_LOG_INFO,
- "Linux AIO not available at build-time."
- " Continuing with synchronous IO");
- return 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
}
void
-__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
- off_t offset, size_t size)
+__posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
{
- xlator_t *this = THIS;
- gf_log (this->name, GF_LOG_INFO,
- "Linux AIO not available at build-time."
- " Continuing with synchronous IO");
- return;
+ xlator_t *this = THIS;
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return;
}
+
#endif
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
index 5bde716019a..b316deb3229 100644
--- a/xlators/storage/posix/src/posix-aio.h
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -10,14 +10,6 @@
#ifndef _POSIX_AIO_H
#define _POSIX_AIO_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "glusterfs.h"
-
// Maximum number of concurrently submitted IO events. The heaviest load
// GlusterFS has been able to handle had 60-80 concurrent calls
#define POSIX_AIO_MAX_NR_EVENTS 256
@@ -25,15 +17,18 @@
// Maximum number of completed IO operations to reap per getevents syscall
#define POSIX_AIO_MAX_NR_GETEVENTS 16
+int
+posix_aio_on(xlator_t *this);
+int
+posix_aio_off(xlator_t *this);
-int posix_aio_on (xlator_t *this);
-int posix_aio_off (xlator_t *this);
-
-int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
+int
+posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
-int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata);
+int
+posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
#endif /* !_POSIX_AIO_H */
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
new file mode 100644
index 00000000000..f10722ec3fb
--- /dev/null
+++ b/xlators/storage/posix/src/posix-common.c
@@ -0,0 +1,1524 @@
+/*
+ Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define __XOPEN_SOURCE 500
+
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <openssl/md5.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#ifdef HAVE_LINKAT
+#include <fcntl.h>
+#endif /* HAVE_LINKAT */
+
+#include "posix-inode-handle.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
+#include "glusterfs3-xdr.h"
+#include "posix-aio.h"
+#include <glusterfs/glusterfs-acl.h>
+#include "posix-messages.h"
+#include <glusterfs/events.h>
+#include "posix-gfid-path.h"
+#include <glusterfs/compat-uuid.h>
+#include "timer-wheel.h"
+
+extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
+
+#undef HAVE_SET_FSID
+#ifdef HAVE_SET_FSID
+
+#define DECLARE_OLD_FS_ID_VAR \
+ uid_t old_fsuid; \
+ gid_t old_fsgid;
+
+#define SET_FS_ID(uid, gid) \
+ do { \
+ old_fsuid = setfsuid(uid); \
+ old_fsgid = setfsgid(gid); \
+ } while (0)
+
+#define SET_TO_OLD_FS_ID() \
+ do { \
+ setfsuid(old_fsuid); \
+ setfsgid(old_fsgid); \
+ } while (0)
+
+#else
+
+#define DECLARE_OLD_FS_ID_VAR
+#define SET_FS_ID(uid, gid)
+#define SET_TO_OLD_FS_ID()
+
+#endif
+
+/* Setting microseconds or nanoseconds depending on what's supported:
+ The passed in `tv` can be
+ struct timespec
+ if supported (better, because it supports nanosecond resolution) or
+ struct timeval
+ otherwise. */
+#if HAVE_UTIMENSAT
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs
+#else
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_usec = nanosecs / 1000
+#endif
+
+int32_t
+posix_priv(xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ if (!this)
+ return 0;
+
+ (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
+ this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ gf_proc_dump_write("base_path", "%s", priv->base_path);
+ gf_proc_dump_write("base_path_length", "%d", priv->base_path_length);
+ gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value));
+ gf_proc_dump_write("max_write", "%" PRId64,
+ GF_ATOMIC_GET(priv->write_value));
+
+ return 0;
+}
+
+int32_t
+posix_inode(xlator_t *this)
+{
+ return 0;
+}
+
+/**
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+int32_t
+posix_notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ xlator_t *victim = data;
+ struct posix_private *priv = this->private;
+ int ret = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ switch (event) {
+ case GF_EVENT_PARENT_UP: {
+ /* Notify the parent that posix xlator is up */
+ default_notify(this, GF_EVENT_CHILD_UP, data);
+ } break;
+
+ case GF_EVENT_PARENT_DOWN: {
+ if (!victim->cleanup_starting)
+ break;
+
+ if (priv->janitor) {
+ pthread_mutex_lock(&priv->janitor_mutex);
+ {
+ priv->janitor_task_stop = _gf_true;
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
+ priv->janitor);
+ if (!ret) {
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ /* Wait to set janitor_task flag to _gf_false by
+ * janitor_task_done */
+ while (priv->janitor_task_stop) {
+ (void)pthread_cond_timedwait(&priv->janitor_cond,
+ &priv->janitor_mutex,
+ &sleep_till);
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ }
+ }
+ }
+ pthread_mutex_unlock(&priv->janitor_mutex);
+ GF_FREE(priv->janitor);
+ }
+ priv->janitor = NULL;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ while (priv->rel_fdcount > 0) {
+ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+ victim->name);
+ default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
+ } break;
+ default:
+ /* */
+ break;
+ }
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_posix_mt_end + 1);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+posix_set_owner(xlator_t *this, uid_t uid, gid_t gid)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+ struct stat st = {
+ 0,
+ };
+
+ priv = this->private;
+
+ ret = sys_lstat(priv->base_path, &st);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED,
+ "Failed to stat "
+ "brick path %s",
+ priv->base_path);
+ return ret;
+ }
+
+ if ((uid == -1 || st.st_uid == uid) && (gid == -1 || st.st_gid == gid))
+ return 0;
+
+ ret = sys_chown(priv->base_path, uid, gid);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED,
+ "Failed to set uid/gid for"
+ " brick path %s",
+ priv->base_path);
+
+ return ret;
+}
+static int
+set_gfid2path_separator(struct posix_private *priv, const char *str)
+{
+ int str_len = 0;
+
+ str_len = strlen(str);
+ if (str_len > 0 && str_len < 8) {
+ strcpy(priv->gfid2path_sep, str);
+ return 0;
+ }
+
+ return -1;
+}
+
+static int
+set_batch_fsync_mode(struct posix_private *priv, const char *str)
+{
+ if (strcmp(str, "none") == 0)
+ priv->batch_fsync_mode = BATCH_NONE;
+ else if (strcmp(str, "syncfs") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS;
+ else if (strcmp(str, "syncfs-single-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
+ else if (strcmp(str, "syncfs-reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
+ else if (strcmp(str, "reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
+ else
+ return -1;
+
+ return 0;
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static int
+set_xattr_user_namespace_mode(struct posix_private *priv, const char *str)
+{
+ if (strcmp(str, "none") == 0)
+ priv->xattr_user_namespace = XATTR_NONE;
+ else if (strcmp(str, "strip") == 0)
+ priv->xattr_user_namespace = XATTR_STRIP;
+ else if (strcmp(str, "append") == 0)
+ priv->xattr_user_namespace = XATTR_APPEND;
+ else if (strcmp(str, "both") == 0)
+ priv->xattr_user_namespace = XATTR_BOTH;
+ else
+ return -1;
+ return 0;
+}
+#endif
+
+int
+posix_reconfigure(xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ int32_t uid = -1;
+ int32_t gid = -1;
+ char *batch_fsync_mode_str = NULL;
+ char *gfid2path_sep = NULL;
+ int32_t force_create_mode = -1;
+ int32_t force_directory_mode = -1;
+ int32_t create_mask = -1;
+ int32_t create_directory_mask = -1;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("brick-uid", uid, options, int32, out);
+ GF_OPTION_RECONF("brick-gid", gid, options, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner(this, uid, gid);
+
+ GF_OPTION_RECONF("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF("batch-fsync-mode", batch_fsync_mode_str, options, str,
+ out);
+
+ if (set_batch_fsync_mode(priv, batch_fsync_mode_str) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT,
+ "Unknown mode string: %s", batch_fsync_mode_str);
+ goto out;
+ }
+
+ GF_OPTION_RECONF("gfid2path-separator", gfid2path_sep, options, str, out);
+ if (set_gfid2path_separator(priv, gfid2path_sep) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT,
+ "Length of separator exceeds 7: %s", gfid2path_sep);
+ goto out;
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+
+ char *xattr_user_namespace_mode_str = NULL;
+
+ GF_OPTION_RECONF("xattr-user-namespace-mode", xattr_user_namespace_mode_str,
+ options, str, out);
+
+ if (set_xattr_user_namespace_mode(priv, xattr_user_namespace_mode_str) !=
+ 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT,
+ "Unknown xattr user namespace mode string: %s",
+ xattr_user_namespace_mode_str);
+ goto out;
+ }
+
+#endif
+
+ GF_OPTION_RECONF("linux-aio", priv->aio_configured, options, bool, out);
+
+ if (priv->aio_configured)
+ posix_aio_on(this);
+ else
+ posix_aio_off(this);
+
+ GF_OPTION_RECONF("update-link-count-parent", priv->update_pgfid_nlinks,
+ options, bool, out);
+
+ GF_OPTION_RECONF("gfid2path", priv->gfid2path, options, bool, out);
+
+ GF_OPTION_RECONF("node-uuid-pathinfo", priv->node_uuid_pathinfo, options,
+ bool, out);
+
+ if (priv->node_uuid_pathinfo && (gf_uuid_is_null(priv->glusterd_uuid))) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
+ out);
+ /* option can be any one of percent or bytes */
+ priv->disk_unit = 0;
+ if (priv->disk_reserve < 100.0)
+ priv->disk_unit = 'p';
+
+ if (priv->disk_reserve) {
+ ret = posix_spawn_disk_space_check_thread(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+ "Getting disk space check from thread failed");
+ goto out;
+ }
+ }
+
+ GF_OPTION_RECONF("health-check-interval", priv->health_check_interval,
+ options, uint32, out);
+ GF_OPTION_RECONF("health-check-timeout", priv->health_check_timeout,
+ options, uint32, out);
+ if (priv->health_check_interval) {
+ ret = posix_spawn_health_check_thread(this);
+ if (ret)
+ goto out;
+ }
+
+ GF_OPTION_RECONF("shared-brick-count", priv->shared_brick_count, options,
+ int32, out);
+
+ GF_OPTION_RECONF("disable-landfill-purge", priv->disable_landfill_purge,
+ options, bool, out);
+ if (priv->disable_landfill_purge) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Janitor WILL NOT purge the landfill directory. "
+ "Your landfill directory"
+ " may fill up this brick.");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Janitor will purge the landfill "
+ "directory, which is default behavior");
+ }
+
+ GF_OPTION_RECONF("force-create-mode", force_create_mode, options, int32,
+ out);
+ priv->force_create_mode = force_create_mode;
+
+ GF_OPTION_RECONF("force-directory-mode", force_directory_mode, options,
+ int32, out);
+ priv->force_directory_mode = force_directory_mode;
+
+ GF_OPTION_RECONF("create-mask", create_mask, options, int32, out);
+ priv->create_mask = create_mask;
+
+ GF_OPTION_RECONF("create-directory-mask", create_directory_mask, options,
+ int32, out);
+ priv->create_directory_mask = create_directory_mask;
+
+ GF_OPTION_RECONF("max-hardlinks", priv->max_hardlinks, options, uint32,
+ out);
+
+ GF_OPTION_RECONF("fips-mode-rchecksum", priv->fips_mode_rchecksum, options,
+ bool, out);
+
+ GF_OPTION_RECONF("ctime", priv->ctime, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+posix_delete_unlink_entry(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ int ret = 0;
+
+ if (!fpath)
+ goto out;
+
+ switch (typeflag) {
+ case FTW_SL:
+ case FTW_NS:
+ case FTW_F:
+ case FTW_SLN:
+ ret = sys_unlink(fpath);
+ break;
+ case FTW_D:
+ case FTW_DP:
+ case FTW_DNR:
+ if (ftwbuf->level != 0) {
+ ret = sys_rmdir(fpath);
+ }
+ break;
+ default:
+ break;
+ }
+ if (ret) {
+ gf_msg("posix_delete_unlink_entry", GF_LOG_WARNING, errno,
+ P_MSG_HANDLE_CREATE,
+ "Deletion of entries %s failed"
+ "Please delete it manually",
+ fpath);
+ }
+out:
+ return 0;
+}
+
+int32_t
+posix_delete_unlink(const char *unlink_path)
+{
+ int ret = -1;
+ int flags = 0;
+
+ flags |= (FTW_DEPTH | FTW_PHYS);
+
+ ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags);
+ if (ret) {
+ gf_msg("posix_delete_unlink", GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Deleting files from %s failed", unlink_path);
+ }
+ return ret;
+}
+
+int32_t
+posix_create_unlink_dir(xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ struct stat stbuf;
+ int ret = -1;
+ uuid_t gfid = {0};
+ char gfid_str[64] = {0};
+ char unlink_path[PATH_MAX] = {
+ 0,
+ };
+ char landfill_path[PATH_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ (void)snprintf(unlink_path, sizeof(unlink_path), "%s/%s", priv->base_path,
+ GF_UNLINK_PATH);
+
+ gf_uuid_generate(gfid);
+ uuid_utoa_r(gfid, gfid_str);
+
+ (void)snprintf(landfill_path, sizeof(landfill_path), "%s/%s/%s",
+ priv->base_path, GF_LANDFILL_PATH, gfid_str);
+
+ ret = sys_stat(unlink_path, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Checking for %s failed", unlink_path);
+ return -1;
+ }
+ break;
+ case 0:
+ if (!S_ISDIR(stbuf.st_mode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Not a directory: %s", unlink_path);
+ return -1;
+ }
+ ret = posix_delete_unlink(unlink_path);
+ return 0;
+ default:
+ break;
+ }
+ ret = sys_mkdir(unlink_path, 0600);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Creating directory %s failed", unlink_path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name)
+{
+ int ret = -1;
+
+ ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0);
+ if (ret < 0 && errno == ENOENT) {
+ ret = sys_mkdirat(pdirfd, dir_name, 0700);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Creating directory %s failed", dir_name);
+ goto out;
+ }
+ ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0);
+ if (ret < 0 && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error mkdir hash-1 %s ", dir_name);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+/**
+ * init -
+ */
+int
+posix_init(xlator_t *this)
+{
+ struct posix_private *_private = NULL;
+ data_t *dir_data = NULL;
+ data_t *tmp_data = NULL;
+ struct stat buf = {
+ 0,
+ };
+ gf_boolean_t tmp_bool = 0;
+ int ret = 0;
+ int op_ret = -1;
+ int op_errno = 0;
+ ssize_t size = -1;
+ uuid_t old_uuid = {
+ 0,
+ };
+ uuid_t dict_uuid = {
+ 0,
+ };
+ uuid_t gfid = {
+ 0,
+ };
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char *guuid = NULL;
+ int32_t uid = -1;
+ int32_t gid = -1;
+ char *batch_fsync_mode_str;
+ char *gfid2path_sep = NULL;
+ int force_create = -1;
+ int force_directory = -1;
+ int create_mask = -1;
+ int create_directory_mask = -1;
+ char dir_handle[PATH_MAX] = {
+ 0,
+ };
+ int i;
+ char fhash[4] = {
+ 0,
+ };
+ int hdirfd = -1;
+ char value;
+
+ dir_data = dict_get(this->options, "directory");
+
+ if (this->children) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR,
+ "FATAL: storage/posix cannot have subvolumes");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING,
+ "Volume is dangling. Please check the volume file.");
+ }
+
+ if (!dir_data) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_EXPORT_DIR_MISSING,
+ "Export directory not specified in volume file.");
+ ret = -1;
+ goto out;
+ }
+
+ umask(000); // umask `masking' is done at the client side
+
+ /* Check whether the specified directory exists, if not log it. */
+ op_ret = sys_stat(dir_data->data, &buf);
+ if ((op_ret != 0) || !S_ISDIR(buf.st_mode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED,
+ "Directory '%s' doesn't exist, exiting.", dir_data->data);
+ ret = -1;
+ goto out;
+ }
+
+ _private = GF_CALLOC(1, sizeof(*_private), gf_posix_mt_posix_private);
+ if (!_private) {
+ ret = -1;
+ goto out;
+ }
+
+ _private->base_path = gf_strdup(dir_data->data);
+ _private->base_path_length = dir_data->len - 1;
+
+ _private->dirfd = -1;
+ _private->mount_lock = -1;
+ for (i = 0; i < 256; i++)
+ _private->arrdfd[i] = -1;
+
+ ret = dict_get_str(this->options, "hostname", &_private->hostname);
+ if (ret) {
+ _private->hostname = GF_CALLOC(256, sizeof(char), gf_common_mt_char);
+ if (!_private->hostname) {
+ goto out;
+ }
+ ret = gethostname(_private->hostname, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HOSTNAME_MISSING,
+ "could not find hostname ");
+ }
+ }
+
+ /* Check for Extended attribute support, if not present, log it */
+ size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value));
+
+ if ((size == -1) && (errno == EOPNOTSUPP)) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR,
+ "getxattr returned %zd", size);
+ tmp_data = dict_get(this->options, "mandate-attribute");
+ if (tmp_data) {
+ if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION,
+ "wrong option provided for key "
+ "\"mandate-attribute\"");
+ ret = -1;
+ goto out;
+ }
+ if (!tmp_bool) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOTSUP,
+ "Extended attribute not supported, "
+ "starting as per option");
+ } else {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_XATTR_NOTSUP,
+ "Extended attribute not supported, "
+ "exiting.");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_XATTR_NOTSUP,
+ "Extended attribute not supported, exiting.");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ tmp_data = dict_get(this->options, "volume-id");
+ if (tmp_data) {
+ op_ret = gf_uuid_parse(tmp_data->data, dict_uuid);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_VOLUME_ID,
+ "wrong volume-id (%s) set"
+ " in volume file",
+ tmp_data->data);
+ ret = -1;
+ goto out;
+ }
+ size = sys_lgetxattr(dir_data->data, "trusted.glusterfs.volume-id",
+ old_uuid, 16);
+ if (size == 16) {
+ if (gf_uuid_compare(old_uuid, dict_uuid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_VOLUME_ID,
+ "mismatching volume-id (%s) received. "
+ "already is a part of volume %s ",
+ tmp_data->data, uuid_utoa(old_uuid));
+ gf_event(EVENT_POSIX_ALREADY_PART_OF_VOLUME,
+ "volume-id=%s;brick=%s:%s", uuid_utoa(old_uuid),
+ _private->hostname, _private->base_path);
+ ret = -1;
+ goto out;
+ }
+ } else if ((size == -1) && (errno == ENODATA || errno == ENOATTR)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_VOLUME_ID_ABSENT,
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
+ gf_event(EVENT_POSIX_BRICK_NOT_IN_VOLUME, "brick=%s:%s",
+ _private->hostname, _private->base_path);
+ ret = -1;
+ goto out;
+
+ } else if ((size == -1) && (errno != ENODATA) && (errno != ENOATTR)) {
+ /* Wrong 'volume-id' is set, it should be error */
+ gf_event(EVENT_POSIX_BRICK_VERIFICATION_FAILED, "brick=%s:%s",
+ _private->hostname, _private->base_path);
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ P_MSG_VOLUME_ID_FETCH_FAILED,
+ "%s: failed to fetch volume-id", dir_data->data);
+ ret = -1;
+ goto out;
+ } else {
+ ret = -1;
+ gf_event(EVENT_POSIX_BRICK_VERIFICATION_FAILED, "brick=%s:%s",
+ _private->hostname, _private->base_path);
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_VOLUME_ID_FETCH_FAILED,
+ "failed to fetch proper volume id from export");
+ goto out;
+ }
+ }
+
+ /* Now check if the export directory has some other 'gfid',
+ other than that of root '/' */
+ size = sys_lgetxattr(dir_data->data, "trusted.gfid", gfid, 16);
+ if (size == 16) {
+ if (!__is_root_gfid(gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_SET_FAILED,
+ "%s: gfid (%s) is not that of glusterfs '/' ",
+ dir_data->data, uuid_utoa(gfid));
+ ret = -1;
+ goto out;
+ }
+ } else if (size != -1) {
+ /* Wrong 'gfid' is set, it should be error */
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_SET_FAILED,
+ "%s: wrong value set as gfid", dir_data->data);
+ ret = -1;
+ goto out;
+ } else if ((size == -1) && (errno != ENODATA) && (errno != ENOATTR)) {
+ /* Wrong 'gfid' is set, it should be error */
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_SET_FAILED,
+ "%s: failed to fetch gfid", dir_data->data);
+ ret = -1;
+ goto out;
+ } else {
+ /* First time volume, set the GFID */
+ size = sys_lsetxattr(dir_data->data, "trusted.gfid", rootgfid, 16,
+ XATTR_CREATE);
+ if (size == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_GFID_SET_FAILED,
+ "%s: failed to set gfid", dir_data->data);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ size = sys_lgetxattr(dir_data->data, POSIX_ACL_ACCESS_XATTR, NULL, 0);
+ if ((size < 0) && (errno == ENOTSUP)) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_NOTSUP,
+ "Posix access control list is not supported.");
+ gf_event(EVENT_POSIX_ACL_NOT_SUPPORTED, "brick=%s:%s",
+ _private->hostname, _private->base_path);
+ }
+
+ /*
+ * _XOPEN_PATH_MAX is the longest file path len we MUST
+ * support according to POSIX standard. When prepended
+ * by the brick base path it may exceed backed filesystem
+ * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If
+ * this is the case, chdir() to the brick base path and
+ * use relative paths when they are too long. See also
+ * MAKE_REAL_PATH in posix-handle.h
+ */
+ _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX);
+ if (_private->path_max != -1 &&
+ _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) {
+ ret = chdir(_private->base_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_BASEPATH_CHDIR_FAILED,
+ "chdir() to \"%s\" failed", _private->base_path);
+ goto out;
+ }
+#ifdef __NetBSD__
+ /*
+ * At least on NetBSD, the chdir() above uncovers a
+ * race condition which cause file lookup to fail
+ * with ENODATA for a few seconds. The volume quickly
+ * reaches a sane state, but regression tests are fast
+ * enough to choke on it. The reason is obscure (as
+ * often with race conditions), but sleeping here for
+ * a second seems to workaround the problem.
+ */
+ sleep(1);
+#endif
+ }
+
+ LOCK_INIT(&_private->lock);
+ GF_ATOMIC_INIT(_private->read_value, 0);
+ GF_ATOMIC_INIT(_private->write_value, 0);
+
+ _private->export_statfs = 1;
+ tmp_data = dict_get(this->options, "export-statfs-size");
+ if (tmp_data) {
+ if (gf_string2boolean(tmp_data->data, &_private->export_statfs) == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL,
+ "'export-statfs-size' takes only boolean "
+ "options");
+ goto out;
+ }
+ if (!_private->export_statfs)
+ gf_msg_debug(this->name, 0, "'statfs()' returns dummy size");
+ }
+
+ _private->background_unlink = 0;
+ tmp_data = dict_get(this->options, "background-unlink");
+ if (tmp_data) {
+ if (gf_string2boolean(tmp_data->data, &_private->background_unlink) ==
+ -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL,
+ "'background-unlink'"
+ " takes only boolean options");
+ goto out;
+ }
+
+ if (_private->background_unlink)
+ gf_msg_debug(this->name, 0,
+ "unlinks will be performed in background");
+ }
+
+ tmp_data = dict_get(this->options, "o-direct");
+ if (tmp_data) {
+ if (gf_string2boolean(tmp_data->data, &_private->o_direct) == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL,
+ "wrong option provided for 'o-direct'");
+ goto out;
+ }
+ if (_private->o_direct)
+ gf_msg_debug(this->name, 0,
+ "o-direct mode is enabled"
+ " (O_DIRECT for every open)");
+ }
+
+ tmp_data = dict_get(this->options, "update-link-count-parent");
+ if (tmp_data) {
+ if (gf_string2boolean(tmp_data->data, &_private->update_pgfid_nlinks) ==
+ -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION,
+ "wrong value provided "
+ "for 'update-link-count-parent'");
+ goto out;
+ }
+ if (_private->update_pgfid_nlinks)
+ gf_msg_debug(this->name, 0,
+ "update-link-count-parent"
+ " is enabled. Thus for each file an "
+ "extended attribute representing the "
+ "number of hardlinks for that file "
+ "within the same parent directory is"
+ " set.");
+ }
+
+ ret = dict_get_str(this->options, "glusterd-uuid", &guuid);
+ if (!ret) {
+ if (gf_uuid_parse(guuid, _private->glusterd_uuid))
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_INVALID_NODE_UUID,
+ "Cannot parse "
+ "glusterd (node) UUID, node-uuid xattr "
+ "request would return - \"No such attribute\"");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "No glusterd (node) UUID passed -"
+ " node-uuid xattr request will return \"No such"
+ " attribute\"");
+ }
+ ret = 0;
+
+ GF_OPTION_INIT("janitor-sleep-duration", _private->janitor_sleep_duration,
+ int32, out);
+
+ /* performing open dir on brick dir locks the brick dir
+ * and prevents it from being unmounted
+ */
+ _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY),
+ 0);
+ if (_private->mount_lock < 0) {
+ ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED,
+ "Could not lock brick directory (%s)", strerror(op_errno));
+ goto out;
+ }
+#ifndef GF_DARWIN_HOST_OS
+ {
+ struct rlimit lim;
+ lim.rlim_cur = 1048576;
+ lim.rlim_max = 1048576;
+
+ if (setrlimit(RLIMIT_NOFILE, &lim) == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SET_ULIMIT_FAILED,
+ "Failed to set 'ulimit -n "
+ " 1048576'");
+ lim.rlim_cur = 65536;
+ lim.rlim_max = 65536;
+
+ if (setrlimit(RLIMIT_NOFILE, &lim) == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ P_MSG_SET_FILE_MAX_FAILED,
+ "Failed to set maximum allowed open "
+ "file descriptors to 64k");
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_MAX_FILE_OPEN,
+ "Maximum allowed "
+ "open file descriptors set to 65536");
+ }
+ }
+ }
+#endif
+ _private->shared_brick_count = 1;
+ ret = dict_get_int32(this->options, "shared-brick-count",
+ &_private->shared_brick_count);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL,
+ "'shared-brick-count' takes only integer "
+ "values");
+ goto out;
+ }
+
+ this->private = (void *)_private;
+ snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path,
+ GF_HIDDEN_PATH);
+ hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock,
+ dir_handle);
+ if (hdirfd < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error open directory failed for dir %s", dir_handle);
+ ret = -1;
+ goto out;
+ }
+ _private->dirfd = hdirfd;
+ for (i = 0; i < 256; i++) {
+ snprintf(fhash, sizeof(fhash), "%02x", i);
+ _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd,
+ fhash);
+ if (_private->arrdfd[i] < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error openat failed for file %s", fhash);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ op_ret = posix_handle_init(this);
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Posix handle setup failed");
+ ret = -1;
+ goto out;
+ }
+
+ op_ret = posix_handle_trash_init(this);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH,
+ "Posix landfill setup failed");
+ ret = -1;
+ goto out;
+ }
+
+ op_ret = posix_create_unlink_dir(this);
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Creation of unlink directory failed");
+ ret = -1;
+ goto out;
+ }
+
+ _private->aio_init_done = _gf_false;
+ _private->aio_capable = _gf_false;
+
+ GF_OPTION_INIT("brick-uid", uid, int32, out);
+ GF_OPTION_INIT("brick-gid", gid, int32, out);
+ if (uid != -1 || gid != -1)
+ posix_set_owner(this, uid, gid);
+
+ GF_OPTION_INIT("linux-aio", _private->aio_configured, bool, out);
+
+ if (_private->aio_configured) {
+ op_ret = posix_aio_on(this);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO,
+ "Posix AIO init failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ GF_OPTION_INIT("node-uuid-pathinfo", _private->node_uuid_pathinfo, bool,
+ out);
+ if (_private->node_uuid_pathinfo &&
+ (gf_uuid_is_null(_private->glusterd_uuid))) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ _private->disk_space_check_active = _gf_false;
+ _private->disk_space_full = 0;
+
+ GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
+
+ /* option can be any one of percent or bytes */
+ _private->disk_unit = 0;
+ if (_private->disk_reserve < 100.0)
+ _private->disk_unit = 'p';
+
+ if (_private->disk_reserve) {
+ ret = posix_spawn_disk_space_check_thread(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+ "Getting disk space check from thread failed ");
+ goto out;
+ }
+ }
+
+ _private->health_check_active = _gf_false;
+ GF_OPTION_INIT("health-check-interval", _private->health_check_interval,
+ uint32, out);
+ GF_OPTION_INIT("health-check-timeout", _private->health_check_timeout,
+ uint32, out);
+ if (_private->health_check_interval) {
+ ret = posix_spawn_health_check_thread(this);
+ if (ret)
+ goto out;
+ }
+ posix_janitor_timer_start(this);
+
+ pthread_mutex_init(&_private->fsync_mutex, NULL);
+ pthread_cond_init(&_private->fsync_cond, NULL);
+ pthread_mutex_init(&_private->janitor_mutex, NULL);
+ pthread_cond_init(&_private->janitor_cond, NULL);
+ pthread_cond_init(&_private->fd_cond, NULL);
+ INIT_LIST_HEAD(&_private->fsyncs);
+ _private->rel_fdcount = 0;
+ ret = posix_spawn_ctx_janitor_thread(this);
+ if (ret)
+ goto out;
+
+ ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
+ "posixfsy");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_FSYNCER_THREAD_CREATE_FAILED,
+ "fsyncer thread creation failed");
+ goto out;
+ }
+
+ GF_OPTION_INIT("batch-fsync-mode", batch_fsync_mode_str, str, out);
+
+ if (set_batch_fsync_mode(_private, batch_fsync_mode_str) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT,
+ "Unknown mode string: %s", batch_fsync_mode_str);
+ goto out;
+ }
+
+ GF_OPTION_INIT("gfid2path", _private->gfid2path, bool, out);
+
+ GF_OPTION_INIT("gfid2path-separator", gfid2path_sep, str, out);
+ if (set_gfid2path_separator(_private, gfid2path_sep) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT,
+ "Length of separator exceeds 7: %s", gfid2path_sep);
+ goto out;
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+
+ char *xattr_user_namespace_mode_str = NULL;
+
+ GF_OPTION_INIT("xattr-user-namespace-mode", xattr_user_namespace_mode_str,
+ str, out);
+
+ if (set_xattr_user_namespace_mode(_private,
+ xattr_user_namespace_mode_str) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT,
+ "Unknown xattr user namespace mode string: %s",
+ xattr_user_namespace_mode_str);
+ goto out;
+ }
+#endif
+
+ GF_OPTION_INIT("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
+ uint32, out);
+
+ GF_OPTION_INIT("disable-landfill-purge", _private->disable_landfill_purge,
+ bool, out);
+ if (_private->disable_landfill_purge) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "Janitor WILL NOT purge the landfill directory. "
+ "Your landfill directory"
+ " may fill up this brick.");
+ }
+
+ GF_OPTION_INIT("force-create-mode", force_create, int32, out);
+ _private->force_create_mode = force_create;
+
+ GF_OPTION_INIT("force-directory-mode", force_directory, int32, out);
+ _private->force_directory_mode = force_directory;
+
+ GF_OPTION_INIT("create-mask", create_mask, int32, out);
+ _private->create_mask = create_mask;
+
+ GF_OPTION_INIT("create-directory-mask", create_directory_mask, int32, out);
+ _private->create_directory_mask = create_directory_mask;
+
+ GF_OPTION_INIT("max-hardlinks", _private->max_hardlinks, uint32, out);
+
+ GF_OPTION_INIT("fips-mode-rchecksum", _private->fips_mode_rchecksum, bool,
+ out);
+
+ GF_OPTION_INIT("ctime", _private->ctime, bool, out);
+
+out:
+ if (ret) {
+ if (_private) {
+ if (_private->dirfd >= 0) {
+ sys_close(_private->dirfd);
+ _private->dirfd = -1;
+ }
+
+ for (i = 0; i < 256; i++) {
+ if (_private->arrdfd[i] >= 0) {
+ sys_close(_private->arrdfd[i]);
+ _private->arrdfd[i] = -1;
+ }
+ }
+ /*unlock brick dir*/
+ if (_private->mount_lock >= 0) {
+ (void)sys_close(_private->mount_lock);
+ _private->mount_lock = -1;
+ }
+
+ GF_FREE(_private->base_path);
+
+ GF_FREE(_private->hostname);
+
+ GF_FREE(_private->trash_path);
+
+ GF_FREE(_private);
+ }
+
+ this->private = NULL;
+ }
+ return ret;
+}
+
+void
+posix_fini(xlator_t *this)
+{
+ struct posix_private *priv = this->private;
+ gf_boolean_t health_check = _gf_false;
+ glusterfs_ctx_t *ctx = this->ctx;
+ uint32_t count;
+ int ret = 0;
+ int i = 0;
+
+ if (!priv)
+ return;
+ LOCK(&priv->lock);
+ {
+ health_check = priv->health_check_active;
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->dirfd >= 0) {
+ sys_close(priv->dirfd);
+ priv->dirfd = -1;
+ }
+
+ for (i = 0; i < 256; i++) {
+ if (priv->arrdfd[i] >= 0) {
+ sys_close(priv->arrdfd[i]);
+ priv->arrdfd[i] = -1;
+ }
+ }
+
+ if (health_check) {
+ (void)gf_thread_cleanup_xint(priv->health_check);
+ priv->health_check = 0;
+ }
+
+ if (priv->disk_space_check) {
+ priv->disk_space_check_active = _gf_false;
+ (void)gf_thread_cleanup_xint(priv->disk_space_check);
+ priv->disk_space_check = 0;
+ }
+
+ if (priv->janitor) {
+ /*TODO: Make sure the synctask is also complete */
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
+ "Failed to delete janitor timer");
+ }
+ GF_FREE(priv->janitor);
+ priv->janitor = NULL;
+ }
+
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ count = --ctx->pxl_count;
+ if (count == 0) {
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ if (count == 0) {
+ pthread_join(ctx->janitor, NULL);
+ }
+
+ if (priv->fsyncer) {
+ (void)gf_thread_cleanup_xint(priv->fsyncer);
+ priv->fsyncer = 0;
+ }
+ /*unlock brick dir*/
+ if (priv->mount_lock >= 0) {
+ (void)sys_close(priv->mount_lock);
+ priv->mount_lock = -1;
+ }
+
+ GF_FREE(priv->base_path);
+ LOCK_DESTROY(&priv->lock);
+ pthread_mutex_destroy(&priv->fsync_mutex);
+ pthread_cond_destroy(&priv->fsync_cond);
+ pthread_mutex_destroy(&priv->janitor_mutex);
+ pthread_cond_destroy(&priv->janitor_cond);
+ GF_FREE(priv->hostname);
+ GF_FREE(priv->trash_path);
+ GF_FREE(priv);
+ this->private = NULL;
+
+ return;
+}
+
+struct volume_options posix_options[] = {
+ {.key = {"o-direct"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"directory"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = "{{brick.path}}"},
+ {.key = {"hostname"}, .type = GF_OPTION_TYPE_ANY},
+ {.key = {"export-statfs-size"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on"},
+ {.key = {"mandate-attribute"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"background-unlink"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"janitor-sleep-duration"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "10",
+ .description = "Interval (in seconds) between times the internal "
+ "'landfill' directory is emptied."},
+ {.key = {"volume-id"},
+ .type = GF_OPTION_TYPE_ANY,
+ .default_value = "{{brick.volumeid}}"},
+ {.key = {"glusterd-uuid"}, .type = GF_OPTION_TYPE_STR},
+ {.key = {"linux-aio"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Support for native Linux AIO",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"brick-uid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = -1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "-1",
+ .description = "Support for setting uid of brick's owner",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"brick-gid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = -1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "-1",
+ .description = "Support for setting gid of brick's owner",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"node-uuid-pathinfo"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "return glusterd's node-uuid in pathinfo xattr"
+ " string instead of hostname",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"health-check-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "30",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds for a filesystem health check, "
+ "set to 0 to disable",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"health-check-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "20",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description =
+ "Interval in seconds to wait aio_write finish for health check, "
+ "set to 0 to disable",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"reserve"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .min = 0,
+ .default_value = "1",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Percentage/Size of disk space to be reserved."
+ " Set to 0 to disable",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+ .description =
+ "Possible values:\n"
+ "\t- syncfs: Perform one syncfs() on behalf oa batch"
+ "of fsyncs.\n"
+ "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch"
+ " of fsyncs and one fsync() per batch.\n"
+ "\t- syncfs-reverse-fsync: Perform one syncfs() on behalf of a batch"
+ " of fsyncs and fsync() each file in the batch in reverse order.\n"
+ " in reverse order.\n"
+ "\t- reverse-fsync: Perform fsync() of each file in the batch in"
+ " reverse order.",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"batch-fsync-delay-usec"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description = "Num of usecs to wait for aggregating fsync"
+ " requests",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"update-link-count-parent"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable placeholders for gfid to path conversion",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"gfid2path"},
+ .type = GF_OPTION_TYPE_BOOL,
+#ifdef __NetBSD__
+ /*
+ * NetBSD storage of extended attributes for UFS1 badly
+ * scales when the list of extended attributes names rises.
+ * This option can add as many extended attributes names
+ * as we have files, hence we keep it disabled for performance
+ * sake.
+ */
+ .default_value = "off",
+#else
+ .default_value = "on",
+#endif
+ .description = "Enable logging metadata for gfid to path conversion",
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .flags = OPT_FLAG_SETTABLE},
+ {.key = {"gfid2path-separator"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = ":",
+ .description = "Path separator for glusterfs.gfidtopath virt xattr",
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+#if GF_DARWIN_HOST_OS
+ {.key = {"xattr-user-namespace-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "none",
+ .description =
+ "Option to control XATTR user namespace on the raw filesystem: "
+ "\t- None: Will use the user namespace, so files will be exchangeable "
+ "with Linux.\n"
+ " The raw filesystem will not be compatible with OS X Finder.\n"
+ "\t- Strip: Will strip the user namespace before setting. The raw "
+ "filesystem will work in OS X.\n",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+#endif
+ {
+ .key = {"shared-brick-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description =
+ "Number of bricks sharing the same backend export."
+ " Useful for displaying the proper usable size through statvfs() "
+ "call (df command)",
+ },
+ {
+ .key = {"disable-landfill-purge"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Disable glusterfs/landfill purges. "
+ "WARNING: This can fill up a brick.",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .tags = {"diagnosis"},
+ },
+ {.key = {"force-create-mode"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0000,
+ .max = 0777,
+ .default_value = "0000",
+ .validate = GF_OPT_VALIDATE_BOTH,
+ .description = "Mode bit permission that will always be set on a file."},
+ {.key = {"force-directory-mode"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0000,
+ .max = 0777,
+ .default_value = "0000",
+ .validate = GF_OPT_VALIDATE_BOTH,
+ .description = "Mode bit permission that will be always set on directory"},
+ {.key = {"create-mask"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0000,
+ .max = 0777,
+ .default_value = "0777",
+ .validate = GF_OPT_VALIDATE_BOTH,
+ .description = "Any bit not set here will be removed from the"
+ "modes set on a file when it is created"},
+ {.key = {"create-directory-mask"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0000,
+ .max = 0777,
+ .default_value = "0777",
+ .validate = GF_OPT_VALIDATE_BOTH,
+ .description = "Any bit not set here will be removed from the"
+ "modes set on a directory when it is created"},
+ {.key = {"max-hardlinks"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "100",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"posix"},
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "max number of hardlinks allowed on any one inode.\n"
+ "0 is unlimited, 1 prevents any hardlinking at all."},
+ {.key = {"fips-mode-rchecksum"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"posix"},
+ .description = "If enabled, posix_rchecksum uses the FIPS compliant"
+ "SHA256 checksum. MD5 otherwise."},
+ {.key = {"ctime"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .tags = {"ctime"},
+ .description =
+ "When this option is enabled, time attributes (ctime,mtime,atime) "
+ "are stored in xattr to keep it consistent across replica and "
+ "distribute set. The time attributes stored at the backend are "
+ "not considered "},
+ {.key = {NULL}},
+};
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
new file mode 100644
index 00000000000..8cc3ccf8c00
--- /dev/null
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -0,0 +1,2496 @@
+/*
+ Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define __XOPEN_SOURCE 500
+
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <openssl/md5.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#ifdef HAVE_LINKAT
+#include <fcntl.h>
+#endif /* HAVE_LINKAT */
+
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include "posix.h"
+#include "posix-handle.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
+#include "glusterfs3-xdr.h"
+#include "posix-aio.h"
+#include <glusterfs/glusterfs-acl.h>
+#include "posix-messages.h"
+#include "posix-metadata.h"
+#include <glusterfs/events.h>
+#include "posix-gfid-path.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/syncop.h>
+
+extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
+
+#undef HAVE_SET_FSID
+#ifdef HAVE_SET_FSID
+
+#define DECLARE_OLD_FS_ID_VAR \
+ uid_t old_fsuid; \
+ gid_t old_fsgid;
+
+#define SET_FS_ID(uid, gid) \
+ do { \
+ old_fsuid = setfsuid(uid); \
+ old_fsgid = setfsgid(gid); \
+ } while (0)
+
+#define SET_TO_OLD_FS_ID() \
+ do { \
+ setfsuid(old_fsuid); \
+ setfsgid(old_fsgid); \
+ } while (0)
+
+#else
+
+#define DECLARE_OLD_FS_ID_VAR
+#define SET_FS_ID(uid, gid)
+#define SET_TO_OLD_FS_ID()
+
+#endif
+
+static gf_boolean_t
+posix_symlinks_match(xlator_t *this, loc_t *loc, uuid_t gfid)
+{
+ struct posix_private *priv = NULL;
+ char linkname_actual[PATH_MAX] = {
+ 0,
+ };
+ char linkname_expected[PATH_MAX] = {0};
+ char *dir_handle = NULL;
+ ssize_t len = 0;
+ size_t handle_size = 0;
+ gf_boolean_t ret = _gf_false;
+
+ priv = this->private;
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+ dir_handle = alloca0(handle_size);
+
+ snprintf(linkname_expected, PATH_MAX, "../../%02x/%02x/%s/%s",
+ loc->pargfid[0], loc->pargfid[1], uuid_utoa(loc->pargfid),
+ loc->name);
+
+ MAKE_HANDLE_GFID_PATH(dir_handle, this, gfid);
+ len = sys_readlink(dir_handle, linkname_actual, PATH_MAX);
+ if (len < 0 || len == PATH_MAX) {
+ if (len == PATH_MAX) {
+ errno = EINVAL;
+ }
+
+ if (errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "readlink[%s] failed", dir_handle);
+ }
+ goto out;
+ }
+ linkname_actual[len] = '\0';
+
+ if (!strcmp(linkname_actual, linkname_expected))
+ ret = _gf_true;
+
+out:
+ return ret;
+}
+
+static dict_t *
+posix_dict_set_nlink(dict_t *req, dict_t *res, int32_t nlink)
+{
+ int ret = -1;
+
+ if (req == NULL || !dict_get_sizen(req, GF_REQUEST_LINK_COUNT_XDATA))
+ goto out;
+
+ if (res == NULL)
+ res = dict_new();
+ if (res == NULL)
+ goto out;
+
+ ret = dict_set_uint32(res, GF_RESPONSE_LINK_COUNT_XDATA, nlink);
+ if (ret == -1)
+ gf_msg("posix", GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "Failed to set GF_RESPONSE_LINK_COUNT_XDATA");
+out:
+ return res;
+}
+
+/* Regular fops */
+
+int32_t
+posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {
+ 0,
+ };
+ int32_t op_ret = -1;
+ int32_t entry_ret = 0;
+ int32_t op_errno = 0;
+ dict_t *xattr = NULL;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ char *gfid_path = NULL;
+ uuid_t gfid = {0};
+ struct iatt postparent = {
+ 0,
+ };
+ struct stat statbuf = {0};
+ int32_t gfidless = 0;
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ struct posix_private *priv = NULL;
+ posix_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ int dfd = -1;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get any gfid on it */
+ if (__is_root_gfid(loc->pargfid) && loc->name &&
+ (strcmp(loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED,
+ "Lookup issued on %s,"
+ " which is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) && loc->name &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED,
+ "Lookup issued on .attribute,"
+ " which is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif /* __NetBSD__ */
+
+ op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless);
+ op_ret = -1;
+ if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) {
+ /* nameless lookup */
+ MAKE_INODE_HANDLE(real_path, this, loc, &buf);
+ } else {
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ op_ret = posix_gfid_heal(this, real_path, loc, xdata);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf);
+ }
+ }
+
+ op_errno = errno;
+
+ if (op_ret == -1) {
+ if (op_errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_path ? real_path : "null");
+ }
+ entry_ret = -1;
+ if (loc_is_nameless(loc)) {
+ if (!op_errno)
+ op_errno = ESTALE;
+ loc_gfid(loc, gfid);
+ MAKE_HANDLE_ABSPATH_FD(gfid_path, this, gfid, dfd);
+ ret = sys_fstatat(dfd, gfid_path, &statbuf, 0);
+ if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR))
+ /*Don't unset if it was a symlink to a dir.*/
+ goto parent;
+ ret = sys_fstatat(dfd, gfid_path, &statbuf, AT_SYMLINK_NOFOLLOW);
+ if (ret == 0 && statbuf.st_nlink == 1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_HANDLE_DELETE,
+ "Found stale gfid "
+ "handle %s, removing it.",
+ gfid_path);
+ posix_handle_unset(this, gfid, NULL);
+ }
+ }
+ goto parent;
+ }
+
+ if (xdata && (op_ret == 0)) {
+ xattr = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &buf);
+
+ posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata,
+ &xattr, _gf_true);
+
+ if (dict_get_sizen(xdata, GF_CLEAN_WRITE_PROTECTION)) {
+ ret = sys_lremovexattr(real_path, GF_PROTECT_FROM_EXTERNAL_WRITES);
+ if (ret == -1 && (errno != ENODATA && errno != ENOATTR))
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
+ "removexattr failed. key %s path %s",
+ GF_PROTECT_FROM_EXTERNAL_WRITES, loc->path);
+ }
+ }
+
+ posix_update_iatt_buf(&buf, -1, real_path, xdata);
+ if (priv->update_pgfid_nlinks) {
+ if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+
+ op_ret = posix_inode_ctx_get_all(loc->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->pgfid_lock);
+ {
+ SET_PGFID_XATTR_IF_ABSENT(real_path, pgfid_xattr_key,
+ nlink_samepgfid, XATTR_CREATE, op_ret,
+ this, unlock);
+ }
+ unlock:
+ pthread_mutex_unlock(&ctx->pgfid_lock);
+ }
+ }
+
+parent:
+ if (par_path) {
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path,
+ &postparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on"
+ " parent %s failed",
+ par_path);
+ if (op_errno == ENOENT)
+ /* If parent directory is missing in a lookup,
+ errno should be ESTALE (bad handle) and not
+ ENOENT (missing entry)
+ */
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ op_ret = entry_ret;
+out:
+ if (!op_ret && !gfidless && gf_uuid_is_null(buf.ia_gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, ENODATA, P_MSG_NULL_GFID,
+ "buf->ia_gfid is null for "
+ "%s",
+ (real_path) ? real_path : "");
+ op_ret = -1;
+ op_errno = ENODATA;
+ }
+
+ if (op_ret == 0)
+ op_errno = 0;
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno,
+ (loc) ? loc->inode : NULL, &buf, xattr, &postparent);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+}
+
+static int32_t
+posix_set_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid,
+ const char *bname)
+{
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char pgfid_bname[1024] = {
+ 0,
+ };
+ char *key = NULL;
+ const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH +
+ GF_XXH64_DIGEST_LENGTH * 2 + 1;
+ int ret = 0;
+ int len;
+
+ len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid),
+ bname);
+ gf_xxh64_wrapper((unsigned char *)pgfid_bname, len,
+ GF_XXHSUM64_DEFAULT_SEED, xxh64);
+ key = alloca(key_size);
+ snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64);
+
+ ret = sys_lsetxattr(path, key, pgfid_bname, len, XATTR_CREATE);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP,
+ "setting gfid2path xattr failed on %s: key = %s ", path, key);
+ }
+
+ return ret;
+}
+
+int
+posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ int tmp_fd = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ char *par_path = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ uuid_t uuid_req = {
+ 0,
+ };
+ int32_t nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ gf_boolean_t linked = _gf_false;
+ gf_loglevel_t level = GF_LOG_NONE;
+ mode_t mode_bit = 0;
+ posix_inode_ctx_t *ctx = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno,
+ uuid_req, out);
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL);
+
+ mode_bit = (priv->create_mask & mode) | priv->force_create_mode;
+ mode = posix_override_umask(mode, mode_bit);
+
+ gid = frame->root->gid;
+
+ SET_FS_ID(frame->root->uid, gid);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent of %s failed", real_path);
+ goto out;
+ }
+
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
+ /* Check if the 'gfid' already exists, because this mknod may be an
+ internal call from distribute for creating 'linkfile', and that
+ linkfile may be for a hardlinked file */
+ if (dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ dict_del_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY);
+ /* trash xlator did not bring the uuid_via the call
+ * to GFID_NULL_CHECK_AND_GOTO() above.
+ * Fetch it explicitly here.
+ */
+ if (frame->root->pid == GF_SERVER_PID_TRASH) {
+ op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req);
+ if (op_ret) {
+ gf_msg_debug(this->name, 0,
+ "failed to get the gfid from dict for %s",
+ loc->path);
+ goto real_op;
+ }
+ }
+
+ op_ret = posix_create_link_if_gfid_exists(this, uuid_req, real_path,
+ loc->inode->table);
+ if (!op_ret) {
+ linked = _gf_true;
+ goto post_op;
+ }
+ }
+
+real_op:
+#ifdef __NetBSD__
+ if (S_ISFIFO(mode))
+ op_ret = mkfifo(real_path, mode);
+ else
+#endif /* __NetBSD__ */
+ op_ret = sys_mknod(real_path, mode, dev);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ if ((op_errno == EINVAL) && S_ISREG(mode)) {
+ /* Over Darwin, mknod with (S_IFREG|mode)
+ doesn't work */
+ tmp_fd = sys_creat(real_path, mode);
+ if (tmp_fd == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CREATE_FAILED,
+ "create failed on"
+ "%s",
+ real_path);
+ goto out;
+ }
+ sys_close(tmp_fd);
+ } else {
+ if (op_errno == EEXIST)
+ level = GF_LOG_DEBUG;
+ else
+ level = GF_LOG_ERROR;
+ gf_msg(this->name, level, errno, P_MSG_MKNOD_FAILED,
+ "mknod on %s failed", real_path);
+ goto out;
+ }
+ }
+
+ entry_created = _gf_true;
+
+#ifndef HAVE_SET_FSID
+ op_ret = sys_lchown(real_path, frame->root->uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
+ "lchown on %s failed", real_path);
+ goto out;
+ }
+#endif
+
+post_op:
+ op_ret = posix_acl_xattr_set(this, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed", real_path);
+ }
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ op_ret = posix_inode_ctx_get_all(loc->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->pgfid_lock);
+ {
+ LINK_MODIFY_PGFID_XATTR(real_path, pgfid_xattr_key, nlink_samepgfid,
+ 0, op_ret, this, unlock);
+ }
+ unlock:
+ pthread_mutex_unlock(&ctx->pgfid_lock);
+ }
+
+ if (priv->gfid2path) {
+ posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+ }
+
+ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ if (errno != EEXIST)
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed", real_path);
+ else
+ gf_msg_debug(this->name, 0, "setting xattrs on %s failed",
+ real_path);
+ }
+
+ if (!linked) {
+ op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid,
+ &op_errno);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ goto out;
+ } else {
+ gfid_set = _gf_true;
+ }
+ }
+
+ op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKNOD_FAILED,
+ "mknod on %s failed", real_path);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent);
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ if (op_ret < 0) {
+ if (entry_created) {
+ if (S_ISREG(mode))
+ sys_unlink(real_path);
+ else
+ sys_rmdir(real_path);
+ }
+
+ if (gfid_set)
+ posix_gfid_unset(this, xdata);
+ }
+
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno,
+ (loc) ? loc->inode : NULL, &stbuf, &preparent,
+ &postparent, NULL);
+
+ return 0;
+}
+
+int
+posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL, *gfid_path = NULL;
+ char *par_path = NULL, *xattr_name = NULL;
+ int xattr_name_len;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ uuid_t uuid_req = {
+ 0,
+ };
+ ssize_t size = 0;
+ dict_t *xdata_rsp = NULL;
+ char *disk_xattr = NULL;
+ data_t *arg_data = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ mode_t mode_bit = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get created from a user request */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED,
+ "mkdir issued on %s, which"
+ "is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED,
+ "mkdir issued on .attribute, which"
+ "is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno,
+ uuid_req, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ gid = frame->root->gid;
+
+ op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false);
+
+ SET_FS_ID(frame->root->uid, gid);
+
+ mode_bit = (priv->create_directory_mask & mode) |
+ priv->force_directory_mode;
+ mode = posix_override_umask(mode, mode_bit);
+
+ if (xdata) {
+ if (!gf_uuid_compare(stbuf.ia_gfid, uuid_req)) {
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+ }
+
+ if (!gf_uuid_is_null(uuid_req)) {
+ op_ret = posix_istat(this, loc->inode, uuid_req, NULL, &stbuf);
+ if ((op_ret == 0) && IA_ISDIR(stbuf.ia_type)) {
+ gfid_path = alloca(PATH_MAX);
+ size = posix_handle_path(this, uuid_req, NULL, gfid_path, PATH_MAX);
+ if (size <= 0) {
+ op_errno = ESTALE;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_OF_SAME_ID,
+ "mkdir (%s): "
+ "gfid (%s) is already associated with "
+ "directory (%s). Hence, both "
+ "directories will share same gfid and "
+ "this can lead to inconsistencies.",
+ loc->path, uuid_utoa(uuid_req),
+ gfid_path ? gfid_path : "<NULL>");
+
+ gf_event(EVENT_POSIX_SAME_GFID,
+ "gfid=%s;"
+ "path=%s;newpath=%s;brick=%s:%s",
+ uuid_utoa(uuid_req), gfid_path ? gfid_path : "<NULL>",
+ loc->path, priv->hostname, priv->base_path);
+ }
+ if (!posix_symlinks_match(this, loc, uuid_req))
+ /* For afr selfheal of dir renames, we need to
+ * remove the old symlink in order for
+ * posix_gfid_set to set the symlink to the
+ * new dir.*/
+ posix_handle_unset(this, stbuf.ia_gfid, NULL);
+ }
+ } else if (frame->root->pid != GF_SERVER_PID_TRASH) {
+ op_ret = -1;
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno, P_MSG_NULL_GFID,
+ "mkdir (%s): is issued without "
+ "gfid-req %p",
+ loc->path, xdata);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ mode |= S_ISGID;
+ }
+
+ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+ if (xattr_name != NULL) {
+ xattr_name_len = strlen(xattr_name);
+ arg_data = dict_getn(xdata, xattr_name, xattr_name_len);
+ if (arg_data) {
+ if (loc->parent)
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+ else
+ gf_uuid_unparse(loc->pargfid, pgfid);
+
+ size = 256;
+ disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!disk_xattr) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): GF_MALLOC failed during"
+ " preop of mkdir (%s)",
+ pgfid, loc->name, real_path);
+ goto out;
+ }
+ disk_xattr[size] = '\0';
+
+ size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size);
+ if (size == -1) {
+ if (disk_xattr) {
+ GF_FREE(disk_xattr);
+ disk_xattr = NULL;
+ }
+ if (errno != ERANGE) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): getxattr failed during"
+ " preop of mkdir (%s).",
+ pgfid, loc->name, real_path);
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): getxattr on key "
+ "(%s) path (%s) failed due to "
+ " buffer overflow",
+ pgfid, loc->name, xattr_name, par_path);
+ size = sys_lgetxattr(par_path, xattr_name, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): getxattr on key (%s)"
+ " path (%s) failed ",
+ pgfid, loc->name, xattr_name, par_path);
+ goto out;
+ }
+ disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!disk_xattr) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): GF_MALLOC failed during"
+ " preop of mkdir (%s)",
+ pgfid, loc->name, real_path);
+ goto out;
+ }
+ disk_xattr[size] = '\0';
+ size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size);
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): getxattr on "
+ " key (%s) path (%s) failed "
+ "(%s)",
+ pgfid, loc->name, xattr_name, par_path,
+ strerror(errno));
+ goto out;
+ }
+ }
+ if ((arg_data->len != size) ||
+ (memcmp(arg_data->data, disk_xattr, size))) {
+ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): failing preop of "
+ "mkdir (%s) as on-disk"
+ " xattr value differs from argument "
+ "value for key %s",
+ pgfid, loc->name, real_path, xattr_name);
+ op_ret = -1;
+ op_errno = EIO;
+
+ xdata_rsp = dict_new();
+ if (xdata_rsp == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ P_MSG_PREOP_CHECK_FAILED,
+ "mkdir (%s/%s): "
+ "dict allocation failed",
+ pgfid, loc->name);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = dict_set_int8(xdata_rsp, GF_PREOP_CHECK_FAILED, 1);
+ if (op_errno < 0)
+ op_errno = errno;
+ goto out;
+ }
+
+ dict_deln(xdata, xattr_name, xattr_name_len);
+ }
+
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+ }
+
+ op_ret = sys_mkdir(real_path, mode);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED,
+ "mkdir of %s failed", real_path);
+ goto out;
+ }
+
+ entry_created = _gf_true;
+
+#ifndef HAVE_SET_FSID
+ op_ret = sys_chown(real_path, frame->root->uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
+ "chown on %s failed", real_path);
+ goto out;
+ }
+#endif
+ op_ret = posix_acl_xattr_set(this, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed ", real_path);
+ }
+
+ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed", real_path);
+ }
+
+ op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid,
+ &op_errno);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ goto out;
+ } else {
+ gfid_set = _gf_true;
+ }
+
+ op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_path);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent of %s failed", real_path);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent);
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ if (disk_xattr)
+ GF_FREE(disk_xattr);
+
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_rmdir(real_path);
+
+ if (gfid_set)
+ posix_gfid_unset(this, xdata);
+ }
+
+ STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno,
+ (loc) ? loc->inode : NULL, &stbuf, &preparent,
+ &postparent, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+
+ return 0;
+}
+
+static int
+posix_add_unlink_to_ctx(inode_t *inode, xlator_t *this, char *unlink_path)
+{
+ uint64_t ctx = GF_UNLINK_FALSE;
+ int ret = 0;
+
+ if (!unlink_path) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
+ "Creation of unlink entry failed");
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_UNLINK_TRUE;
+ ret = posix_inode_ctx_set_unlink_flag(inode, this, ctx);
+ if (ret < 0) {
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int32_t
+posix_move_gfid_to_unlink(xlator_t *this, uuid_t gfid, loc_t *loc)
+{
+ char *unlink_path = NULL;
+ char *gfid_path = NULL;
+ int ret = -1;
+ struct posix_private *priv_posix = NULL;
+
+ priv_posix = (struct posix_private *)this->private;
+
+ MAKE_HANDLE_GFID_PATH(gfid_path, this, gfid);
+
+ POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, loc->inode->gfid,
+ unlink_path);
+ if (!unlink_path) {
+ ret = -1;
+ goto out;
+ }
+ gf_msg_debug(this->name, 0, "Moving gfid: %s to unlink_path : %s",
+ gfid_path, unlink_path);
+ ret = sys_rename(gfid_path, unlink_path);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED,
+ "Creation of unlink entry failed for gfid: %s", unlink_path);
+ goto out;
+ }
+ ret = posix_add_unlink_to_ctx(loc->inode, this, unlink_path);
+ if (ret < 0)
+ goto out;
+
+out:
+ return ret;
+}
+
+static int32_t
+posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this,
+ const char *real_path, struct iatt *stbuf,
+ int32_t *op_errno, loc_t *loc,
+ gf_boolean_t get_link_count,
+ dict_t *rsp_dict)
+{
+ int32_t ret = 0;
+ struct iatt prebuf = {
+ 0,
+ };
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t update_ctime = _gf_false;
+
+ /* Unlink the gfid_handle_first */
+ if (stbuf && stbuf->ia_nlink == 1) {
+ LOCK(&loc->inode->lock);
+
+ if (loc->inode->fd_count == 0) {
+ UNLOCK(&loc->inode->lock);
+ ret = posix_handle_unset(this, stbuf->ia_gfid, NULL);
+ } else {
+ UNLOCK(&loc->inode->lock);
+ ret = posix_move_gfid_to_unlink(this, stbuf->ia_gfid, loc);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED,
+ "unlink of gfid handle "
+ "failed for path:%s with gfid %s",
+ real_path, uuid_utoa(stbuf->ia_gfid));
+ }
+ } else {
+ update_ctime = _gf_true;
+ }
+
+ if (get_link_count) {
+ LOCK(&loc->inode->lock);
+ locked = _gf_true;
+ /* Since this stat is to get link count and not for time
+ * attributes, intentionally passing inode as NULL
+ */
+ ret = posix_pstat(this, NULL, loc->gfid, real_path, &prebuf, _gf_true);
+ if (ret) {
+ UNLOCK(&loc->inode->lock);
+ locked = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_path);
+ goto err;
+ }
+ }
+
+ /* Unlink the actual file */
+ ret = sys_unlink(real_path);
+
+ if (locked) {
+ UNLOCK(&loc->inode->lock);
+ locked = _gf_false;
+ }
+
+ if (ret == -1) {
+ if (op_errno)
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED,
+ "unlink of %s failed", real_path);
+ goto err;
+ }
+
+ if (update_ctime) {
+ posix_set_ctime(frame, this, NULL, -1, loc->inode, stbuf);
+ }
+
+ ret = dict_set_uint32(rsp_dict, GET_LINK_COUNT, prebuf.ia_nlink);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set " GET_LINK_COUNT " for %s", real_path);
+
+ return 0;
+
+err:
+ if (locked) {
+ UNLOCK(&loc->inode->lock);
+ locked = _gf_false;
+ }
+ return -1;
+}
+
+static gf_boolean_t
+posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key,
+ const int keylen, const char *linkto_xattr,
+ struct iatt *stbuf, const char *real_path)
+{
+ gf_boolean_t skip_unlink = _gf_false;
+ gf_boolean_t is_dht_linkto_file = _gf_false;
+ int unlink_if_linkto = 0;
+ ssize_t xattr_size = -1;
+ int op_ret = -1;
+
+ op_ret = dict_get_int32n(xdata, key, keylen, &unlink_if_linkto);
+
+ if (!op_ret && unlink_if_linkto) {
+ is_dht_linkto_file = IS_DHT_LINKFILE_MODE(stbuf);
+ if (!is_dht_linkto_file)
+ return _gf_true;
+
+ LOCK(&loc->inode->lock);
+
+ xattr_size = sys_lgetxattr(real_path, linkto_xattr, NULL, 0);
+
+ UNLOCK(&loc->inode->lock);
+
+ if (xattr_size <= 0)
+ skip_unlink = _gf_true;
+
+ gf_msg("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS,
+ "linkto_xattr status: %" PRIu32 " for %s", skip_unlink,
+ real_path);
+ }
+ return skip_unlink;
+}
+
+static int32_t
+posix_remove_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid,
+ const char *bname)
+{
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char pgfid_bname[1024] = {
+ 0,
+ };
+ int ret = 0;
+ char *key = NULL;
+ const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH +
+ GF_XXH64_DIGEST_LENGTH * 2 + 1;
+ int len;
+
+ len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid),
+ bname);
+ gf_xxh64_wrapper((unsigned char *)pgfid_bname, len,
+ GF_XXHSUM64_DEFAULT_SEED, xxh64);
+ key = alloca(key_size);
+ snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64);
+
+ ret = sys_lremovexattr(path, key);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP,
+ "removing gfid2path xattr failed on %s: key = %s", path, key);
+ }
+
+ return ret;
+}
+
+int32_t
+posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ int32_t fd = -1;
+ int ret = -1;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ int32_t check_open_fd = 0;
+ int32_t skip_unlink = 0;
+ int32_t fdstat_requested = 0;
+ dict_t *unwind_dict = NULL;
+ gf_boolean_t get_link_count = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ priv = this->private;
+
+ op_ret = dict_get_int32_sizen(xdata, DHT_SKIP_OPEN_FD_UNLINK,
+ &check_open_fd);
+
+ if (!op_ret && check_open_fd) {
+ LOCK(&loc->inode->lock);
+
+ if (loc->inode->fd_count) {
+ skip_unlink = 1;
+ }
+
+ UNLOCK(&loc->inode->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_KEY_STATUS_INFO,
+ "open-fd-key-status: %" PRIu32 " for %s", skip_unlink,
+ real_path);
+
+ if (skip_unlink) {
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+ }
+ /*
+ * If either of the function return true, skip_unlink.
+ * If first first function itself return true,
+ * we don't need to call second function, skip unlink.
+ */
+ skip_unlink = posix_skip_non_linkto_unlink(
+ xdata, loc, DHT_SKIP_NON_LINKTO_UNLINK,
+ SLEN(DHT_SKIP_NON_LINKTO_UNLINK), DHT_LINKTO, &stbuf, real_path);
+ if (skip_unlink) {
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (IA_ISREG(loc->inode->ia_type) && xdata &&
+ dict_get_sizen(xdata, DHT_IATT_IN_XDATA_KEY)) {
+ fdstat_requested = 1;
+ }
+
+ if (fdstat_requested ||
+ (priv->background_unlink && IA_ISREG(loc->inode->ia_type))) {
+ fd = sys_open(real_path, O_RDONLY, 0);
+ if (fd == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED,
+ "open of %s failed", real_path);
+ goto out;
+ }
+ }
+
+ if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ op_ret = posix_inode_ctx_get_all(loc->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ pthread_mutex_lock(&ctx->pgfid_lock);
+ {
+ UNLINK_MODIFY_PGFID_XATTR(real_path, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret, this, unlock);
+ }
+ unlock:
+ pthread_mutex_unlock(&ctx->pgfid_lock);
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED,
+ "modification of "
+ "parent gfid xattr failed (path:%s gfid:%s)",
+ real_path, uuid_utoa(loc->inode->gfid));
+ if (op_errno != ENOATTR)
+ /* Allow unlink if pgfid xattr is not set. */
+ goto out;
+ }
+ }
+
+ if (priv->gfid2path && (stbuf.ia_nlink > 1)) {
+ op_ret = posix_remove_gfid2path_xattr(this, real_path, loc->pargfid,
+ loc->name);
+ if (op_ret < 0) {
+ /* Allow unlink if pgfid xattr is not set. */
+ if (errno != ENOATTR)
+ goto out;
+ }
+ }
+
+ unwind_dict = dict_new();
+ if (!unwind_dict) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (xdata && dict_get_sizen(xdata, GF_GET_FILE_BLOCK_COUNT)) {
+ ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT,
+ stbuf.ia_blocks);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT);
+ }
+
+ if (xdata && dict_get_sizen(xdata, GET_LINK_COUNT))
+ get_link_count = _gf_true;
+ op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf,
+ &op_errno, loc, get_link_count,
+ unwind_dict);
+ if (op_ret == -1) {
+ goto out;
+ }
+
+ if (fdstat_requested) {
+ op_ret = posix_fdstat(this, loc->inode, fd, &postbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post operation "
+ "fstat failed on fd=%d",
+ fd);
+ goto out;
+ }
+ op_ret = posix_set_iatt_in_dict(unwind_dict, NULL, &postbuf);
+ if (op_ret == -1) {
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_DICT_SET_FAILED,
+ "failed to set fdstat in dict");
+ }
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent);
+
+ unwind_dict = posix_dict_set_nlink(xdata, unwind_dict, stbuf.ia_nlink);
+ op_ret = 0;
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, &preparent,
+ &postparent, unwind_dict);
+
+ if (fd != -1) {
+ sys_close(fd);
+ }
+
+ /* unref unwind_dict*/
+ if (unwind_dict) {
+ dict_unref(unwind_dict);
+ }
+
+ return 0;
+}
+
+int
+posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ char *gfid_str = NULL;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ char tmp_path[PATH_MAX] = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get deleted from inside process */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED,
+ "rmdir issued on %s, which"
+ "is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED,
+ "rmdir issued on .attribute, which"
+ "is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif
+
+ priv = this->private;
+
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ if (flags) {
+ op_ret = sys_mkdir(priv->trash_path, 0755);
+ if (errno != EEXIST && op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED,
+ "mkdir of %s failed", priv->trash_path);
+ } else {
+ gfid_str = uuid_utoa(stbuf.ia_gfid);
+ (void)snprintf(tmp_path, sizeof(tmp_path), "%s/%s",
+ priv->trash_path, gfid_str);
+ gf_msg_debug(this->name, 0, "Moving %s to %s", real_path, tmp_path);
+ op_ret = sys_rename(real_path, tmp_path);
+ }
+ } else {
+ op_ret = sys_rmdir(real_path);
+ }
+ op_errno = errno;
+
+ if (op_ret == 0) {
+ if (posix_symlinks_match(this, loc, stbuf.ia_gfid))
+ posix_handle_unset(this, stbuf.ia_gfid, NULL);
+ }
+
+ if (op_errno == EEXIST)
+ /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
+ op_errno = ENOTEMPTY;
+
+ /* No need to log a common error as ENOTEMPTY */
+ if (op_ret == -1 && op_errno != ENOTEMPTY) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_RMDIR_FAILED,
+ "rmdir of %s failed", real_path);
+ }
+
+ if (op_ret == -1) {
+ if (op_errno == ENOTEMPTY) {
+ gf_msg_debug(this->name, 0, "%s on %s failed",
+ (flags) ? "rename" : "rmdir", real_path);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_DIR_OPERATION_FAILED, "%s on %s failed",
+ (flags) ? "rename" : "rmdir", real_path);
+ }
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent of %s failed", par_path);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent);
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, &preparent, &postparent,
+ NULL);
+
+ return 0;
+}
+
+int
+posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ char *par_path = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ gid_t gid = 0;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ uuid_t uuid_req = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(linkname, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno,
+ uuid_req, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
+
+ gid = frame->root->gid;
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ SET_FS_ID(frame->root->uid, gid);
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
+ op_ret = sys_symlink(linkname, real_path);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_SYMLINK_FAILED,
+ "symlink of %s --> %s failed", real_path, linkname);
+ goto out;
+ }
+
+ entry_created = _gf_true;
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
+
+#ifndef HAVE_SET_FSID
+ op_ret = sys_lchown(real_path, frame->root->uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
+ "lchown failed on %s", real_path);
+ goto out;
+ }
+#endif
+ op_ret = posix_acl_xattr_set(this, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed", real_path);
+ }
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+ SET_PGFID_XATTR(real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+
+ if (priv->gfid2path) {
+ posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+ }
+
+ignore:
+ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed ", real_path);
+ }
+
+ op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid,
+ &op_errno);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ goto out;
+ } else {
+ gfid_set = _gf_true;
+ }
+
+ op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", real_path);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent);
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_unlink(real_path);
+
+ if (gfid_set)
+ posix_gfid_unset(this, xdata);
+ }
+
+ STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno,
+ (loc) ? loc->inode : NULL, &stbuf, &preparent,
+ &postparent, NULL);
+
+ return 0;
+}
+
+int
+posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_oldpath = NULL;
+ char *real_newpath = NULL;
+ char *par_oldpath = NULL;
+ char *par_newpath = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ char was_present = 1;
+ struct iatt preoldparent = {
+ 0,
+ };
+ struct iatt postoldparent = {
+ 0,
+ };
+ struct iatt prenewparent = {
+ 0,
+ };
+ struct iatt postnewparent = {
+ 0,
+ };
+ char olddirid[64];
+ char newdirid[64];
+ uuid_t victim = {0};
+ int was_dir = 0;
+ int nlink = 0;
+ char *pgfid_xattr_key = NULL;
+ int32_t nlink_samepgfid = 0;
+ char *gfid_path = NULL;
+ dict_t *unwind_dict = NULL;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t get_link_count = _gf_false;
+ posix_inode_ctx_t *ctx_old = NULL;
+ posix_inode_ctx_t *ctx_new = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(oldloc, out);
+ VALIDATE_OR_GOTO(newloc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL);
+ if (!real_oldpath || !par_oldpath) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ MAKE_ENTRY_HANDLE(real_newpath, par_newpath, this, newloc, &stbuf);
+ if (!real_newpath || !par_newpath) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ unwind_dict = dict_new();
+ if (!unwind_dict) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, oldloc->parent, oldloc->pargfid, par_oldpath,
+ &preoldparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_oldpath);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath,
+ &prenewparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent of %s failed", par_newpath);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, newloc->inode, NULL, real_newpath, &stbuf,
+ _gf_false);
+ if ((op_ret == -1) && (errno == ENOENT)) {
+ was_present = 0;
+ } else {
+ gf_uuid_copy(victim, stbuf.ia_gfid);
+ if (IA_ISDIR(stbuf.ia_type))
+ was_dir = 1;
+ nlink = stbuf.ia_nlink;
+ }
+
+ if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) {
+ gf_msg(this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND,
+ "found directory at %s while expecting ENOENT", real_newpath);
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ if (was_present && IA_ISDIR(stbuf.ia_type) &&
+ gf_uuid_compare(newloc->inode->gfid, stbuf.ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND,
+ "found directory %s at %s while renaming %s",
+ uuid_utoa_r(newloc->inode->gfid, olddirid), real_newpath,
+ uuid_utoa_r(stbuf.ia_gfid, newdirid));
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ op_ret = posix_inode_ctx_get_all(oldloc->inode, this, &ctx_old);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (newloc->inode) {
+ op_ret = posix_inode_ctx_get_all(newloc->inode, this, &ctx_new);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (IA_ISDIR(oldloc->inode->ia_type))
+ posix_handle_unset(this, oldloc->inode->gfid, NULL);
+
+ pthread_mutex_lock(&ctx_old->pgfid_lock);
+ {
+ if (!IA_ISDIR(oldloc->inode->ia_type) && priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ oldloc->pargfid);
+ UNLINK_MODIFY_PGFID_XATTR(real_oldpath, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret, this, unlock);
+ }
+
+ if ((xdata) && (dict_get(xdata, GET_LINK_COUNT)) && (real_newpath) &&
+ (was_present) && ctx_new) {
+ pthread_mutex_lock(&ctx_new->pgfid_lock);
+ locked = _gf_true;
+ get_link_count = _gf_true;
+ op_ret = posix_pstat(this, newloc->inode, newloc->gfid,
+ real_newpath, &stbuf, _gf_false);
+ if ((op_ret == -1) && (errno != ENOENT)) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_newpath);
+ goto unlock;
+ }
+ }
+
+ op_ret = sys_rename(real_oldpath, real_newpath);
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno == ENOTEMPTY) {
+ gf_msg_debug(this->name, 0,
+ "rename of %s to"
+ " %s failed: %s",
+ real_oldpath, real_newpath, strerror(op_errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_RENAME_FAILED,
+ "rename of %s to %s failed", real_oldpath, real_newpath);
+ }
+
+ if (priv->update_pgfid_nlinks &&
+ !IA_ISDIR(oldloc->inode->ia_type)) {
+ LINK_MODIFY_PGFID_XATTR(real_oldpath, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret, this,
+ unlock);
+ }
+
+ goto unlock;
+ }
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx_new->pgfid_lock);
+ locked = _gf_false;
+ }
+
+ if ((get_link_count) &&
+ (dict_set_uint32(unwind_dict, GET_LINK_COUNT, stbuf.ia_nlink)))
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set " GET_LINK_COUNT " for %s", real_newpath);
+
+ if (!IA_ISDIR(oldloc->inode->ia_type) && priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ newloc->pargfid);
+ LINK_MODIFY_PGFID_XATTR(real_newpath, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret, this, unlock);
+ }
+
+ if (!IA_ISDIR(oldloc->inode->ia_type) && priv->gfid2path) {
+ MAKE_HANDLE_ABSPATH(gfid_path, this, oldloc->inode->gfid);
+
+ posix_remove_gfid2path_xattr(this, gfid_path, oldloc->pargfid,
+ oldloc->name);
+ posix_set_gfid2path_xattr(this, gfid_path, newloc->pargfid,
+ newloc->name);
+ }
+ }
+
+unlock:
+ if (locked) {
+ pthread_mutex_unlock(&ctx_new->pgfid_lock);
+ locked = _gf_false;
+ }
+ pthread_mutex_unlock(&ctx_old->pgfid_lock);
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED,
+ "modification of "
+ "parent gfid xattr failed (gfid:%s)",
+ uuid_utoa(oldloc->inode->gfid));
+ goto out;
+ }
+
+ if (was_dir)
+ posix_handle_unset(this, victim, NULL);
+
+ if (was_present && !was_dir && nlink == 1)
+ posix_handle_unset(this, victim, NULL);
+
+ if (IA_ISDIR(oldloc->inode->ia_type)) {
+ posix_handle_soft(this, real_newpath, newloc, oldloc->inode->gfid,
+ NULL);
+ }
+
+ op_ret = posix_pstat(this, newloc->inode, NULL, real_newpath, &stbuf,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_newpath);
+ goto out;
+ }
+
+ /* Since the same inode is later used and dst inode is not present,
+ * update ctime on source inode. It can't use old path because it
+ * doesn't exist and xattr has to be stored on disk */
+ posix_set_ctime(frame, this, real_newpath, -1, oldloc->inode, &stbuf);
+
+ op_ret = posix_pstat(this, oldloc->parent, oldloc->pargfid, par_oldpath,
+ &postoldparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_oldpath);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_oldpath, -1, oldloc->parent,
+ &postoldparent);
+
+ op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath,
+ &postnewparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_newpath);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_newpath, -1, newloc->parent,
+ &postnewparent);
+
+ if (was_present)
+ unwind_dict = posix_dict_set_nlink(xdata, unwind_dict, nlink);
+ op_ret = 0;
+out:
+
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, &stbuf, &preoldparent,
+ &postoldparent, &prenewparent, &postnewparent,
+ unwind_dict);
+
+ if (unwind_dict)
+ dict_unref(unwind_dict);
+
+ return 0;
+}
+
+int
+posix_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_oldpath = 0;
+ char *real_newpath = 0;
+ char *par_newpath = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ int32_t nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
+ gf_boolean_t entry_created = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(oldloc, out);
+ VALIDATE_OR_GOTO(newloc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_oldpath, this, oldloc, &stbuf);
+ if (!real_oldpath) {
+ op_errno = errno;
+ goto out;
+ }
+
+ if (priv->max_hardlinks && stbuf.ia_nlink >= priv->max_hardlinks) {
+ op_ret = -1;
+ op_errno = EMLINK;
+ gf_log(this->name, GF_LOG_ERROR,
+ "hardlink failed: %s exceeds max link count (%u/%u).",
+ real_oldpath, stbuf.ia_nlink, priv->max_hardlinks);
+ goto out;
+ }
+
+ MAKE_ENTRY_HANDLE(real_newpath, par_newpath, this, newloc, &stbuf);
+ if (!real_newpath || !par_newpath) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath,
+ &preparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", par_newpath);
+ goto out;
+ }
+
+ op_ret = sys_link(real_oldpath, real_newpath);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LINK_FAILED,
+ "link %s to %s failed", real_oldpath, real_newpath);
+ goto out;
+ }
+
+ entry_created = _gf_true;
+
+ op_ret = posix_pstat(this, newloc->inode, NULL, real_newpath, &stbuf,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", real_newpath);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_newpath, -1, newloc->inode, &stbuf);
+
+ op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath,
+ &postparent, _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", par_newpath);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_newpath, -1, newloc->parent,
+ &postparent);
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ newloc->pargfid);
+
+ op_ret = posix_inode_ctx_get_all(newloc->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->pgfid_lock);
+ {
+ LINK_MODIFY_PGFID_XATTR(real_newpath, pgfid_xattr_key,
+ nlink_samepgfid, 0, op_ret, this, unlock);
+ }
+ unlock:
+ pthread_mutex_unlock(&ctx->pgfid_lock);
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED,
+ "modification of "
+ "parent gfid xattr failed (path:%s gfid:%s)",
+ real_newpath, uuid_utoa(newloc->inode->gfid));
+ goto out;
+ }
+ }
+
+ if (priv->gfid2path) {
+ if (stbuf.ia_nlink <= MAX_GFID2PATH_LINK_SUP) {
+ op_ret = posix_set_gfid2path_xattr(this, real_newpath,
+ newloc->pargfid, newloc->name);
+ if (op_ret) {
+ op_errno = errno;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_XATTR_NOTSUP,
+ "Link count exceeded. "
+ "gfid2path xattr not set (path:%s gfid:%s)",
+ real_newpath, uuid_utoa(newloc->inode->gfid));
+ }
+ }
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno,
+ (oldloc) ? oldloc->inode : NULL, &stbuf, &preparent,
+ &postparent, NULL);
+
+ if (op_ret < 0) {
+ if (entry_created)
+ sys_unlink(real_newpath);
+ }
+
+ return 0;
+}
+
+int
+posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t _fd = -1;
+ int _flags = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ struct posix_private *priv = NULL;
+ char was_present = 1;
+
+ gid_t gid = 0;
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+
+ int nlink_samepgfid = 0;
+ char *pgfid_xattr_key = NULL;
+ gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false;
+ mode_t mode_bit = 0;
+ uuid_t uuid_req = {
+ 0,
+ };
+
+ dict_t *xdata_rsp = dict_ref(xdata);
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno,
+ uuid_req, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
+
+ gid = frame->root->gid;
+
+ SET_FS_ID(frame->root->uid, gid);
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
+ if (!flags) {
+ _flags = O_CREAT | O_RDWR | O_EXCL;
+ } else {
+ _flags = flags | O_CREAT;
+ }
+
+ op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false);
+ if ((op_ret == -1) && (errno == ENOENT)) {
+ was_present = 0;
+ }
+
+ if (!was_present) {
+ if (posix_is_layout_stale(xdata, par_path, this)) {
+ op_ret = -1;
+ op_errno = EIO;
+ if (!xdata_rsp) {
+ xdata_rsp = dict_new();
+ if (!xdata_rsp) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) ==
+ -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED,
+ "setting key %s in dict failed", GF_PREOP_CHECK_FAILED);
+ }
+
+ goto out;
+ }
+ }
+
+ if (priv->o_direct)
+ _flags |= O_DIRECT;
+
+ mode_bit = (priv->create_mask & mode) | priv->force_create_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ _fd = sys_open(real_path, _flags, mode);
+
+ if (_fd == -1) {
+ op_errno = errno;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED,
+ "open on %s failed", real_path);
+ goto out;
+ }
+
+ if ((_flags & O_CREAT) && (_flags & O_EXCL)) {
+ entry_created = _gf_true;
+ }
+
+ if (was_present)
+ goto fill_stat;
+
+#ifndef HAVE_SET_FSID
+ op_ret = sys_chown(real_path, frame->root->uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
+ "chown on %s failed", real_path);
+ }
+#endif
+ op_ret = posix_acl_xattr_set(this, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED,
+ "setting ACLs on %s failed", real_path);
+ }
+
+ if (priv->update_pgfid_nlinks) {
+ MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
+ loc->pargfid);
+ nlink_samepgfid = 1;
+ SET_PGFID_XATTR(real_path, pgfid_xattr_key, nlink_samepgfid,
+ XATTR_CREATE, op_ret, this, ignore);
+ }
+
+ if (priv->gfid2path) {
+ posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+ }
+ignore:
+ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setting xattrs on %s failed ", real_path);
+ }
+
+fill_stat:
+ op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid,
+ &op_errno);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED,
+ "setting gfid on %s failed", real_path);
+ goto out;
+ } else {
+ gfid_set = _gf_true;
+ }
+
+ op_ret = posix_fdstat(this, loc->inode, _fd, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat on %d failed", _fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent);
+
+ op_ret = -1;
+ pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = errno;
+ goto out;
+ }
+
+ pfd->flags = flags;
+ pfd->fd = _fd;
+
+ op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context path=%s fd=%p", real_path, fd);
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ if (op_ret < 0) {
+ if (_fd != -1)
+ sys_close(_fd);
+
+ if (entry_created)
+ sys_unlink(real_path);
+
+ if (gfid_set)
+ posix_gfid_unset(this, xdata);
+ }
+
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd,
+ (loc) ? loc->inode : NULL, &stbuf, &preparent,
+ &postparent, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+
+ return 0;
+}
+
+/* TODO: Ensure atomocity of put, and rollback in case of failure
+ * One of the ways, is to perform put in the hidden directory
+ * and rename it to the specified location, if the put was successful
+ */
+int32_t
+posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ fd_t *fd = NULL;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt preparent = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+
+ MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
+
+ if (!real_path || !par_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent,
+ _gf_false);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+ fd = fd_create(loc->inode, getpid());
+ if (!fd) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ fd->flags = flags;
+
+ /* No xlators are expected below posix, but we cannot still call
+ * sys_create() directly here, as posix_create does many other things like
+ * chmod, setxattr etc. along with sys_create(). But we cannot also directly
+ * call posix_create() as it calls STACK_UNWIND. Hence using syncop()
+ */
+ op_ret = syncop_create(this, loc, flags, mode, fd, &stbuf, xdata, NULL);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CREATE_FAILED,
+ "create of %s failed", loc->path);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent,
+ _gf_false);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on parent %s failed", par_path);
+ goto out;
+ }
+
+ op_ret = syncop_writev(this, fd, vector, count, offset, iobref, flags, NULL,
+ NULL, xdata, NULL);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITE_FAILED,
+ "write on file %s failed", loc->path);
+ goto out;
+ }
+
+ op_ret = syncop_fsetxattr(this, fd, xattr, flags, xdata, NULL);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setxattr on file %s failed", loc->path);
+ goto out;
+ }
+
+ op_ret = syncop_flush(this, fd, xdata, NULL);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CLOSE_FAILED,
+ "setxattr on file %s failed", loc->path);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &stbuf,
+ _gf_false);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "post-operation lstat on %s failed", real_path);
+ goto out;
+ }
+out:
+ STACK_UNWIND_STRICT(put, frame, op_ret, op_errno, loc->inode, &stbuf,
+ &preparent, &postparent, NULL);
+
+ return 0;
+}
diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c
new file mode 100644
index 00000000000..1b38e9b0479
--- /dev/null
+++ b/xlators/storage/posix/src/posix-gfid-path.c
@@ -0,0 +1,243 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdint.h>
+
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/logging.h>
+#include "posix-messages.h"
+#include "posix-mem-types.h"
+#include "posix-gfid-path.h"
+#include "posix.h"
+
+gf_boolean_t
+posix_is_gfid2path_xattr(const char *name)
+{
+ if (name && strncmp(GFID2PATH_XATTR_KEY_PREFIX, name,
+ GFID2PATH_XATTR_KEY_PREFIX_LENGTH) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static int gf_posix_xattr_enotsup_log;
+
+int32_t
+posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path,
+ int *op_errno, dict_t *dict)
+{
+ int ret = 0;
+ char *path = NULL;
+ ssize_t size = 0;
+ char *list = NULL;
+ int32_t list_offset = 0;
+ int32_t i = 0;
+ int32_t j = 0;
+ char *paths[MAX_GFID2PATH_LINK_SUP] = {
+ NULL,
+ };
+ char *value = NULL;
+ size_t remaining_size = 0;
+ size_t bytes = 0;
+ char keybuffer[4096] = {
+ 0,
+ };
+
+ uuid_t pargfid = {
+ 0,
+ };
+ gf_boolean_t have_val = _gf_false;
+ struct posix_private *priv = NULL;
+ char pargfid_str[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+ gf_boolean_t found = _gf_false;
+ int len;
+
+ priv = this->private;
+
+ if (IA_ISDIR(inode->ia_type)) {
+ ret = posix_resolve_dirgfid_to_path(inode->gfid, priv->base_path, NULL,
+ &path);
+ if (ret < 0) {
+ ret = -1;
+ goto err;
+ }
+ ret = dict_set_dynstr(dict, GFID2PATH_VIRT_XATTR_KEY, path);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "could not set "
+ "value for key (%s)",
+ GFID2PATH_VIRT_XATTR_KEY);
+ goto err;
+ }
+ found = _gf_true;
+ } else {
+ char value_buf[8192] = {
+ 0,
+ };
+ char xattr_value[8192] = {
+ 0,
+ };
+ have_val = _gf_false;
+ size = sys_llistxattr(real_path, value_buf, sizeof(value_buf) - 1);
+ if (size > 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed due to overflow of"
+ " buffer on %s ",
+ real_path);
+ size = sys_llistxattr(real_path, NULL, 0);
+ }
+ if (size == -1) {
+ *op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on %s", real_path);
+ }
+ goto err;
+ }
+ if (size == 0)
+ goto done;
+ }
+ list = alloca(size);
+ if (!list) {
+ *op_errno = errno;
+ goto err;
+ }
+ if (have_val) {
+ memcpy(list, value_buf, size);
+ } else {
+ size = sys_llistxattr(real_path, list, size);
+ if (size < 0) {
+ ret = -1;
+ *op_errno = errno;
+ goto err;
+ }
+ }
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ len = snprintf(keybuffer, sizeof(keybuffer), "%s",
+ list + list_offset);
+
+ if (!posix_is_gfid2path_xattr(keybuffer)) {
+ goto ignore;
+ }
+
+ found = _gf_true;
+ size = sys_lgetxattr(real_path, keybuffer, xattr_value,
+ sizeof(xattr_value) - 1);
+ if (size == -1) {
+ ret = -1;
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on"
+ " %s: key = %s ",
+ real_path, keybuffer);
+ break;
+ }
+
+ /* Parse pargfid from xattr value*/
+ strncpy(pargfid_str, xattr_value, 36);
+ pargfid_str[36] = '\0';
+ gf_uuid_parse(pargfid_str, pargfid);
+
+ /* Convert pargfid to path */
+ ret = posix_resolve_dirgfid_to_path(pargfid, priv->base_path,
+ &xattr_value[37], &paths[i]);
+ i++;
+
+ ignore:
+ remaining_size -= (len + 1);
+ list_offset += (len + 1);
+ } /* while (remaining_size > 0) */
+
+ /* gfid2path xattr is absent in the list of xattrs */
+ if (!found) {
+ ret = -1;
+ /*
+ * ENODATA because xattr is not present in the
+ * list of xattrs. Thus the consumer should
+ * face error instead of a success and a empty
+ * string in the dict for the key.
+ */
+ *op_errno = ENODATA;
+ goto err;
+ }
+
+ /*
+ * gfid2path xattr is found in list of xattrs, but getxattr
+ * on the 1st gfid2path xattr itself failed and the while
+ * loop above broke. So there is nothing in the value. So
+ * it would be better not to send "" as the value for any
+ * key, as it is not true.
+ */
+ if (found && !i)
+ goto err; /* both errno and ret are set before beak */
+
+ /* Calculate memory to be allocated */
+ for (j = 0; j < i; j++) {
+ bytes += strlen(paths[j]);
+ if (j < i - 1)
+ bytes += strlen(priv->gfid2path_sep);
+ }
+ value = GF_CALLOC(bytes + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ ret = -1;
+ *op_errno = errno;
+ goto err;
+ }
+
+ for (j = 0; j < i; j++) {
+ strcat(value, paths[j]);
+ if (j != i - 1)
+ strcat(value, priv->gfid2path_sep);
+ }
+ value[bytes] = '\0';
+
+ ret = dict_set_dynptr(dict, GFID2PATH_VIRT_XATTR_KEY, value, bytes);
+ if (ret < 0) {
+ *op_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on %s for the key %s failed.",
+ real_path, GFID2PATH_VIRT_XATTR_KEY);
+ GF_FREE(value);
+ goto err;
+ }
+ }
+
+done:
+ for (j = 0; j < i; j++) {
+ if (paths[j])
+ GF_FREE(paths[j]);
+ }
+ ret = 0;
+ return ret;
+err:
+ if (path)
+ GF_FREE(path);
+ for (j = 0; j < i; j++) {
+ if (paths[j])
+ GF_FREE(paths[j]);
+ }
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-gfid-path.h b/xlators/storage/posix/src/posix-gfid-path.h
new file mode 100644
index 00000000000..79096e5893f
--- /dev/null
+++ b/xlators/storage/posix/src/posix-gfid-path.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_GFID_PATH_H
+#define _POSIX_GFID_PATH_H
+
+#include <glusterfs/compat-errno.h>
+
+#include <stdint.h> // for int32_t
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
+#include "glusterfs/inode.h" // for inode_t
+#include "uuid.h" // for uuid_t
+#define MAX_GFID2PATH_LINK_SUP 500
+
+gf_boolean_t
+posix_is_gfid2path_xattr(const char *name);
+int32_t
+posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path,
+ int *op_errno, dict_t *dict);
+#endif /* _POSIX_GFID_PATH_H */
diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c
index 78aba988e31..410b38da8cb 100644
--- a/xlators/storage/posix/src/posix-handle.c
+++ b/xlators/storage/posix/src/posix-handle.c
@@ -7,11 +7,6 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -23,290 +18,377 @@
#include "posix-handle.h"
#include "posix.h"
-#include "xlator.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
+#include "posix-messages.h"
+#include "posix-metadata.h"
-inode_t *
-posix_resolve (xlator_t *this, inode_table_t *itable, inode_t *parent,
- char *bname, struct iatt *iabuf)
-{
- inode_t *inode = NULL, *linked_inode = NULL;
- int ret = -1;
+#include <glusterfs/compat-errno.h>
- ret = posix_istat (this, parent->gfid, bname, iabuf);
- if (ret < 0)
- goto out;
+int
+posix_handle_mkdir_hashes(xlator_t *this, int dfd, uuid_t gfid);
- inode = inode_find (itable, iabuf->ia_gfid);
+inode_t *
+posix_resolve(xlator_t *this, inode_table_t *itable, inode_t *parent,
+ char *bname, struct iatt *iabuf)
+{
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ ret = posix_istat(this, NULL, parent->gfid, bname, iabuf);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "gfid: %s, bname: %s "
+ "failed",
+ uuid_utoa(parent->gfid), bname);
+ goto out;
+ }
+
+ if (__is_root_gfid(iabuf->ia_gfid) && !strcmp(bname, "/")) {
+ inode = itable->root;
+ } else {
+ inode = inode_find(itable, iabuf->ia_gfid);
if (inode == NULL) {
- inode = inode_new (itable);
- }
-
- linked_inode = inode_link (inode, parent, bname, iabuf);
-
- inode_unref (inode);
+ inode = inode_new(itable);
+ gf_uuid_copy(inode->gfid, iabuf->ia_gfid);
+ }
+ }
+
+ /* posix_istat wouldn't have fetched posix_mdata_t i.e.,
+ * time attributes as inode is passed as NULL, hence get
+ * here once you got the inode
+ */
+ ret = posix_get_mdata_xattr(this, NULL, -1, inode, iabuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+ "posix get mdata failed on gfid:%s", uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ /* Linking an inode here, can cause a race in posix_acl.
+ Parent inode gets linked here, but before
+ it reaches posix_acl_readdirp_cbk, create/lookup can
+ come on a leaf-inode, as parent-inode-ctx not yet updated
+ in posix_acl_readdirp_cbk, create and lookup can fail
+ with EACCESS. So do the inode linking in the quota xlator
+
+ if (__is_root_gfid (iabuf->ia_gfid) && !strcmp (bname, "/"))
+ linked_inode = itable->root;
+ else
+ linked_inode = inode_link (inode, parent, bname, iabuf);
+
+ inode_unref (inode);*/
out:
- return linked_inode;
+ return inode;
}
int
-posix_make_ancestral_node (const char *priv_base_path, char *path, int pathsize,
- gf_dirent_t *head,
- char *dir_name, struct iatt *iabuf, inode_t *inode,
- int type, dict_t *xdata)
+posix_make_ancestral_node(const char *priv_base_path, char *path, int pathsize,
+ gf_dirent_t *head, char *dir_name, struct iatt *iabuf,
+ inode_t *inode, int type, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- char real_path[PATH_MAX + 1] = {0, }, len = 0;
- loc_t loc = {0, };
- int ret = -1;
-
- len = strlen (path) + strlen (dir_name) + 1;
- if (len > pathsize) {
- goto out;
- }
-
- strcat (path, dir_name);
-
- if (type & POSIX_ANCESTRY_DENTRY) {
- entry = gf_dirent_for_name (dir_name);
- if (!entry) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "could not create gf_dirent for entry %s: (%s)",
- dir_name, strerror (errno));
- goto out;
- }
-
- entry->d_stat = *iabuf;
- entry->inode = inode_ref (inode);
-
- list_add_tail (&entry->list, &head->list);
- strcpy (real_path, priv_base_path);
- strcat (real_path, "/");
- strcat (real_path, path);
- loc.inode = inode_ref (inode);
- uuid_copy (loc.gfid, inode->gfid);
-
- entry->dict = posix_lookup_xattr_fill (THIS, real_path, &loc,
- xdata, iabuf);
- loc_wipe (&loc);
- }
-
- ret = 0;
+ gf_dirent_t *entry = NULL;
+ char real_path[PATH_MAX + 1] =
+ {
+ 0,
+ },
+ len = 0;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+
+ len = strlen(path) + strlen(dir_name) + 1;
+ if (len > pathsize) {
+ goto out;
+ }
+
+ strcat(path, dir_name);
+ if (*dir_name != '/')
+ strcat(path, "/");
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ entry = gf_dirent_for_name(dir_name);
+ if (!entry)
+ goto out;
+
+ entry->d_stat = *iabuf;
+ entry->inode = inode_ref(inode);
+
+ list_add_tail(&entry->list, &head->list);
+ snprintf(real_path, sizeof(real_path), "%s/%s", priv_base_path, path);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ entry->dict = posix_xattr_fill(THIS, real_path, &loc, NULL, -1, xdata,
+ iabuf);
+ loc_wipe(&loc);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize,
- gf_dirent_t *head, int type, uuid_t gfid,
- const size_t handle_size,
- const char *priv_base_path, inode_table_t *itable,
- inode_t **parent, dict_t *xdata)
+posix_make_ancestryfromgfid(xlator_t *this, char *path, int pathsize,
+ gf_dirent_t *head, int type, uuid_t gfid,
+ const size_t handle_size,
+ const char *priv_base_path, inode_table_t *itable,
+ inode_t **parent, dict_t *xdata, int32_t *op_errno)
{
- char *linkname = NULL; /* "../../<gfid[0]>/<gfid[1]/"
- "<gfidstr>/<NAME_MAX>" */
- char *dir_handle = NULL;
- char *dir_name = NULL;
- char *pgfidstr = NULL;
- char *saveptr = NULL;
- ssize_t len = 0;
- inode_t *inode = NULL;
- struct iatt iabuf = {0, };
- int ret = -1;
- uuid_t tmp_gfid = {0, };
-
- if (!path || !parent || !priv_base_path || uuid_is_null (gfid)) {
- goto out;
- }
-
- if (__is_root_gfid (gfid)) {
- if (parent) {
- if (*parent) {
- inode_unref (*parent);
- }
-
- *parent = inode_ref (itable->root);
- }
-
- inode = itable->root;
-
- memset (&iabuf, 0, sizeof (iabuf));
- uuid_copy (iabuf.ia_gfid, inode->gfid);
- iabuf.ia_type = inode->ia_type;
-
- ret = posix_make_ancestral_node (priv_base_path, path, pathsize,
- head, "/", &iabuf, inode, type,
- xdata);
- return ret;
- }
-
- dir_handle = alloca (handle_size);
- linkname = alloca (PATH_MAX);
- snprintf (dir_handle, handle_size, "%s/%s/%02x/%02x/%s",
- priv_base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
- uuid_utoa (gfid));
-
- len = readlink (dir_handle, linkname, PATH_MAX);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR, "could not read the link "
- "from the gfid handle %s (%s)", dir_handle,
- strerror (errno));
+ char *linkname = NULL; /* "../../<gfid[0]>/<gfid[1]/"
+ "<gfidstr>/<NAME_MAX>" */
+ char *dir_handle = NULL;
+ char *pgfidstr = NULL;
+ char *saveptr = NULL;
+ ssize_t len = 0;
+ inode_t *inode = NULL;
+ struct iatt iabuf = {
+ 0,
+ };
+ int ret = -1;
+ uuid_t tmp_gfid = {
+ 0,
+ };
+ char *dir_stack[PATH_MAX / 2 + 1]; /* Since PATH_MAX/2 also gives
+ an upper bound on depth of
+ directories tree */
+ uuid_t gfid_stack[PATH_MAX / 2 + 1];
+
+ char *dir_name = NULL;
+ char *saved_dir = NULL;
+ int top = -1;
+
+ if (!path || !parent || !priv_base_path || gf_uuid_is_null(gfid)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ dir_handle = alloca(handle_size);
+ linkname = alloca(PATH_MAX);
+ gf_uuid_copy(tmp_gfid, gfid);
+
+ while (top < PATH_MAX / 2) {
+ gf_uuid_copy(gfid_stack[++top], tmp_gfid);
+ if (__is_root_gfid(tmp_gfid)) {
+ *parent = inode_ref(itable->root);
+
+ saved_dir = alloca(sizeof("/"));
+ strcpy(saved_dir, "/");
+ dir_stack[top] = saved_dir;
+ break;
+ } else {
+ snprintf(dir_handle, handle_size, "%s/%s/%02x/%02x/%s",
+ priv_base_path, GF_HIDDEN_PATH, tmp_gfid[0], tmp_gfid[1],
+ uuid_utoa(tmp_gfid));
+
+ len = sys_readlink(dir_handle, linkname, PATH_MAX);
+ if (len < 0) {
+ *op_errno = errno;
+ gf_msg(this->name,
+ (errno == ENOENT || errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ errno, P_MSG_READLINK_FAILED,
+ "could not read"
+ " the link from the gfid handle %s ",
+ dir_handle);
+ ret = -1;
goto out;
+ }
+
+ linkname[len] = '\0';
+
+ pgfidstr = strtok_r(linkname + SLEN("../../00/00/"), "/", &saveptr);
+ dir_name = strtok_r(NULL, "/", &saveptr);
+ saved_dir = alloca(strlen(dir_name) + 1);
+ gf_uuid_parse(pgfidstr, tmp_gfid);
+ strcpy(saved_dir, dir_name);
+ dir_stack[top] = saved_dir;
+ }
+ }
+ if (top == PATH_MAX / 2) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_ANCESTORY_FAILED, 0,
+ "build ancestry failed due to "
+ "deep directory hierarchy, depth: %d.",
+ top);
+ *op_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ while (top >= 0) {
+ if (!*parent) {
+ /* There's no real "root" cause for how we end up here,
+ * so for now let's log this and bail out to prevent
+ * crashes.
+ */
+ gf_msg(this->name, GF_LOG_WARNING, P_MSG_INODE_RESOLVE_FAILED, 0,
+ "OOPS: *parent is null (path: %s), bailing!", path);
+ goto out;
+ }
+
+ memset(&iabuf, 0, sizeof(iabuf));
+ inode = posix_resolve(this, itable, *parent, dir_stack[top], &iabuf);
+ if (inode == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_INODE_RESOLVE_FAILED, 0,
+ "posix resolve on the inode %s failed",
+ uuid_utoa(gfid_stack[top]));
+ *op_errno = ESTALE;
+ ret = -1;
+ goto out;
}
- linkname[len] = '\0';
-
- pgfidstr = strtok_r (linkname + SLEN("../../00/00/"), "/", &saveptr);
- dir_name = strtok_r (NULL, "/", &saveptr);
- strcat (dir_name, "/");
-
- uuid_parse (pgfidstr, tmp_gfid);
-
- ret = posix_make_ancestryfromgfid (this, path, pathsize, head, type,
- tmp_gfid, handle_size,
- priv_base_path, itable, parent,
- xdata);
+ ret = posix_make_ancestral_node(priv_base_path, path, pathsize, head,
+ dir_stack[top], &iabuf, inode, type,
+ xdata);
if (ret < 0) {
- goto out;
- }
-
- memset (&iabuf, 0, sizeof (iabuf));
-
- inode = posix_resolve (this, itable, *parent, dir_name, &iabuf);
-
- ret = posix_make_ancestral_node (priv_base_path, path, pathsize, head,
- dir_name, &iabuf, inode, type, xdata);
- if (*parent != NULL) {
- inode_unref (*parent);
+ *op_errno = ENOMEM;
+ goto out;
}
+ inode_unref(*parent);
*parent = inode;
-
+ top--;
+ }
out:
- return ret;
+ return ret;
}
int
-posix_handle_relpath (xlator_t *this, uuid_t gfid, const char *basename,
- char *buf, size_t buflen)
+posix_handle_relpath(xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t buflen)
{
- char *uuid_str = NULL;
- int len = 0;
-
- len = SLEN("../")
- + SLEN("../")
- + SLEN("00/")
- + SLEN("00/")
- + SLEN(UUID0_STR)
- + 1 /* '\0' */
- ;
-
- if (basename) {
- len += (strlen (basename) + 1);
- }
-
- if (buflen < len || !buf)
- return len;
+ char *uuid_str = NULL;
+ int len = 0;
- uuid_str = uuid_utoa (gfid);
+ len = POSIX_GFID_HANDLE_RELSIZE;
- if (basename) {
- len = snprintf (buf, buflen, "../../%02x/%02x/%s/%s",
- gfid[0], gfid[1], uuid_str, basename);
- } else {
- len = snprintf (buf, buflen, "../../%02x/%02x/%s",
- gfid[0], gfid[1], uuid_str);
- }
+ if (basename) {
+ len += (strlen(basename) + 1);
+ }
+ if (buflen < len || !buf)
return len;
-}
+ uuid_str = uuid_utoa(gfid);
+
+ if (basename) {
+ len = snprintf(buf, buflen, "../../%02x/%02x/%s/%s", gfid[0], gfid[1],
+ uuid_str, basename);
+ } else {
+ len = snprintf(buf, buflen, "../../%02x/%02x/%s", gfid[0], gfid[1],
+ uuid_str);
+ }
+
+ return len;
+}
/*
TODO: explain how this pump fixes ELOOP
*/
-int
-posix_handle_pump (xlator_t *this, char *buf, int len, int maxlen,
- char *base_str, int base_len, int pfx_len)
+gf_boolean_t
+posix_is_malformed_link(xlator_t *this, char *base_str, char *linkname,
+ size_t len)
{
- char linkname[512] = {0,}; /* "../../<gfid>/<NAME_MAX>" */
- int ret = 0;
- int blen = 0;
- int link_len = 0;
-
- /* is a directory's symlink-handle */
- ret = readlink (base_str, linkname, 512);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "internal readlink failed on %s (%s)",
- base_str, strerror (errno));
- goto err;
- }
-
- if (ret < 512)
- linkname[ret] = 0;
+ if ((len == 8) && strcmp(linkname, "../../..")) /*for root*/
+ goto err;
- link_len = ret;
-
- if ((ret == 8) && memcmp (linkname, "../../..", 8) == 0) {
- if (strcmp (base_str, buf) == 0) {
- strcpy (buf + pfx_len, "..");
- }
- goto out;
- }
-
- if (ret < 50 || ret >= 512) {
- gf_log (this->name, GF_LOG_ERROR,
- "malformed internal link %s for %s",
- linkname, base_str);
- goto err;
- }
+ if (len < 50 || len >= 512)
+ goto err;
- if (memcmp (linkname, "../../", 6) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "malformed internal link %s for %s",
- linkname, base_str);
- goto err;
- }
+ if (memcmp(linkname, "../../", 6) != 0)
+ goto err;
- if ((linkname[2] != '/') ||
- (linkname[5] != '/') ||
- (linkname[8] != '/') ||
- (linkname[11] != '/') ||
- (linkname[48] != '/')) {
- gf_log (this->name, GF_LOG_ERROR,
- "malformed internal link %s for %s",
- linkname, base_str);
- goto err;
- }
+ if ((linkname[2] != '/') || (linkname[5] != '/') || (linkname[8] != '/') ||
+ (linkname[11] != '/') || (linkname[48] != '/')) {
+ goto err;
+ }
- if ((linkname[20] != '-') ||
- (linkname[25] != '-') ||
- (linkname[30] != '-') ||
- (linkname[35] != '-')) {
- gf_log (this->name, GF_LOG_ERROR,
- "malformed internal link %s for %s",
- linkname, base_str);
- goto err;
- }
+ if ((linkname[20] != '-') || (linkname[25] != '-') ||
+ (linkname[30] != '-') || (linkname[35] != '-')) {
+ goto err;
+ }
- blen = link_len - 48;
- memmove (buf + base_len + blen, buf + base_len,
- (strlen (buf) - base_len) + 1);
+ return _gf_false;
- strncpy (base_str + pfx_len, linkname + 6, 42);
+err:
+ gf_log_callingfn(this->name, GF_LOG_ERROR,
+ "malformed internal link "
+ "%s for %s",
+ linkname, base_str);
+ return _gf_true;
+}
- if (len + blen < maxlen)
- strncpy (buf + pfx_len, linkname + 6, link_len - 6);
+int
+posix_handle_pump(xlator_t *this, char *buf, int len, int maxlen,
+ char *base_str, int base_len, int pfx_len)
+{
+ char linkname[512] = {
+ 0,
+ }; /* "../../<gfid>/<NAME_MAX>" */
+ int ret = 0;
+ int blen = 0;
+ int link_len = 0;
+ char tmpstr[POSIX_GFID_HASH2_LEN] = {
+ 0,
+ };
+ char d2[3] = {
+ 0,
+ };
+ int index = 0;
+ int dirfd = 0;
+ struct posix_private *priv = this->private;
+
+ strncpy(tmpstr, (base_str + pfx_len + 3), 40);
+ strncpy(d2, (base_str + pfx_len), 2);
+ index = strtoul(d2, NULL, 16);
+ dirfd = priv->arrdfd[index];
+
+ /* is a directory's symlink-handle */
+ ret = readlinkat(dirfd, tmpstr, linkname, 512);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED,
+ "internal readlink failed on %s ", base_str);
+ goto err;
+ }
+
+ if (ret < 512)
+ linkname[ret] = 0;
+
+ link_len = ret;
+
+ if ((ret == 8) && memcmp(linkname, "../../..", 8) == 0) {
+ if (strcmp(base_str, buf) == 0) {
+ strcpy(buf + pfx_len, "..");
+ }
+ goto out;
+ }
+
+ if (posix_is_malformed_link(this, base_str, linkname, ret))
+ goto err;
+
+ blen = link_len - 48;
+
+ if (len + blen >= maxlen) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLEPATH_FAILED,
+ "Unable to form handle path for %s (maxlen = %d)", buf, maxlen);
+ goto err;
+ }
+
+ memmove(buf + base_len + blen, buf + base_len,
+ (strlen(buf) - base_len) + 1);
+
+ strncpy(base_str + pfx_len, linkname + 6, 42);
+
+ strncpy(buf + pfx_len, linkname + 6, link_len - 6);
out:
- return len + blen;
+ return len + blen;
err:
- return -1;
+ return -1;
}
-
/*
posix_handle_path differs from posix_handle_gfid_path in the way that the
path filled in @buf by posix_handle_path will return type IA_IFDIR when
@@ -317,560 +399,622 @@ err:
*/
int
-posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename,
- char *ubuf, size_t size)
+posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf,
+ size_t size)
{
- struct posix_private *priv = NULL;
- char *uuid_str = NULL;
- int len = 0;
- int ret = -1;
- struct stat stat;
- char *base_str = NULL;
- int base_len = 0;
- int pfx_len;
- int maxlen;
- char *buf;
-
- priv = this->private;
-
- uuid_str = uuid_utoa (gfid);
-
- if (ubuf) {
- buf = ubuf;
- maxlen = size;
- } else {
- maxlen = PATH_MAX;
- buf = alloca (maxlen);
- }
-
- base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45);
- base_str = alloca (base_len + 1);
- base_len = snprintf (base_str, base_len + 1, "%s/%s/%02x/%02x/%s",
- priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
- uuid_str);
-
- pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1;
-
- if (basename) {
- len = snprintf (buf, maxlen, "%s/%s", base_str, basename);
- } else {
- len = snprintf (buf, maxlen, "%s", base_str);
- }
-
- ret = lstat (base_str, &stat);
-
- if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1))
- goto out;
-
- do {
- errno = 0;
- ret = posix_handle_pump (this, buf, len, maxlen,
- base_str, base_len, pfx_len);
- if (ret == -1)
- break;
-
- len = ret;
-
- ret = lstat (buf, &stat);
- } while ((ret == -1) && errno == ELOOP);
+ struct posix_private *priv = NULL;
+ char *uuid_str = NULL;
+ int len = 0;
+ int ret = -1;
+ struct stat stat;
+ char *base_str = NULL;
+ int base_len = 0;
+ int pfx_len;
+ int maxlen;
+ char *buf;
+ int index = 0;
+ int dfd = 0;
+ char newstr[POSIX_GFID_HASH2_LEN] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ uuid_str = uuid_utoa(gfid);
+
+ if (ubuf) {
+ buf = ubuf;
+ maxlen = size;
+ } else {
+ maxlen = PATH_MAX;
+ buf = alloca(maxlen);
+ }
+
+ index = gfid[0];
+ dfd = priv->arrdfd[index];
+
+ base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45);
+ base_str = alloca(base_len + 1);
+ base_len = snprintf(base_str, base_len + 1, "%s/%s/%02x/%02x/%s",
+ priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
+ uuid_str);
+ pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1;
+
+ if (basename) {
+ len = snprintf(buf, maxlen, "%s/%s", base_str, basename);
+ } else {
+ len = snprintf(buf, maxlen, "%s", base_str);
+ }
+
+ snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_str);
+ ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW);
+
+ if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1))
+ goto out;
+
+ do {
+ errno = 0;
+ ret = posix_handle_pump(this, buf, len, maxlen, base_str, base_len,
+ pfx_len);
+ len = ret;
+
+ if (ret == -1)
+ break;
+ ret = sys_lstat(buf, &stat);
+ } while ((ret == -1) && errno == ELOOP);
out:
- return len + 1;
+ return len + 1;
}
-
int
-posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
- char *buf, size_t buflen)
+posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t buflen)
{
- struct posix_private *priv = NULL;
- char *uuid_str = NULL;
- int len = 0;
-
- priv = this->private;
-
- len = priv->base_path_length /* option directory "/export" */
- + SLEN("/")
- + SLEN(GF_HIDDEN_PATH)
- + SLEN("/")
- + SLEN("00/")
- + SLEN("00/")
- + SLEN(UUID0_STR)
- + 1 /* '\0' */
- ;
-
- if (basename) {
- len += (strlen (basename) + 1);
- } else {
- len += 256; /* worst-case for directory's symlink-handle expansion */
- }
+ struct posix_private *priv = NULL;
+ char *uuid_str = NULL;
+ int len = 0;
- if ((buflen < len) || !buf)
- return len;
+ priv = this->private;
- uuid_str = uuid_utoa (gfid);
+ len = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
- if (__is_root_gfid (gfid)) {
- if (basename) {
- len = snprintf (buf, buflen, "%s/%s", priv->base_path,
- basename);
- } else {
- strncpy (buf, priv->base_path, buflen);
- }
- goto out;
- }
+ len += 256; /* worst-case for directory's symlink-handle expansion */
- if (basename) {
- len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path,
- GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str, basename);
- } else {
- len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path,
- GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str);
- }
-out:
+ if ((buflen < len) || !buf)
return len;
-}
+ uuid_str = uuid_utoa(gfid);
+
+ if (__is_root_gfid(gfid)) {
+ len = snprintf(buf, buflen, "%s", priv->base_path);
+ } else {
+ len = snprintf(buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path,
+ GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str);
+ }
+
+ return len;
+}
int
-posix_handle_init (xlator_t *this)
+posix_handle_init(xlator_t *this)
{
- struct posix_private *priv = NULL;
- char *handle_pfx = NULL;
- int ret = 0;
- struct stat stbuf;
- struct stat rootbuf;
- struct stat exportbuf;
- char *rootstr = NULL;
- uuid_t gfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
-
- priv = this->private;
-
- ret = stat (priv->base_path, &exportbuf);
- if (ret || !S_ISDIR (exportbuf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Not a directory: %s", priv->base_path);
- return -1;
- }
+ struct posix_private *priv = NULL;
+ char *handle_pfx = NULL;
+ int ret = 0;
+ struct stat stbuf;
+ struct stat rootbuf;
+ struct stat exportbuf;
+ char *rootstr = NULL;
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ int dfd = 0;
+
+ priv = this->private;
+
+ ret = sys_stat(priv->base_path, &exportbuf);
+ if (ret || !S_ISDIR(exportbuf.st_mode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Not a directory: %s", priv->base_path);
+ return -1;
+ }
- handle_pfx = alloca (priv->base_path_length + 1 + strlen (GF_HIDDEN_PATH)
- + 1);
+ handle_pfx = alloca(priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1);
- sprintf (handle_pfx, "%s/%s", priv->base_path, GF_HIDDEN_PATH);
+ sprintf(handle_pfx, "%s/%s", priv->base_path, GF_HIDDEN_PATH);
- ret = stat (handle_pfx, &stbuf);
- switch (ret) {
+ ret = sys_stat(handle_pfx, &stbuf);
+ switch (ret) {
case -1:
- if (errno == ENOENT) {
- ret = mkdir (handle_pfx, 0600);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Creating directory %s failed: %s",
- handle_pfx, strerror (errno));
- return -1;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Checking for %s failed: %s",
- handle_pfx, strerror (errno));
- return -1;
+ if (errno == ENOENT) {
+ ret = sys_mkdir(handle_pfx, 0600);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Creating directory %s failed", handle_pfx);
+ return -1;
}
- break;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Checking for %s failed", handle_pfx);
+ return -1;
+ }
+ break;
case 0:
- if (!S_ISDIR (stbuf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Not a directory: %s",
- handle_pfx);
- return -1;
- }
- break;
+ if (!S_ISDIR(stbuf.st_mode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Not a directory: %s", handle_pfx);
+ return -1;
+ }
+ break;
default:
- break;
- }
+ break;
+ }
- stat (handle_pfx, &priv->handledir);
+ ret = sys_stat(handle_pfx, &priv->handledir);
- MAKE_HANDLE_ABSPATH(rootstr, this, gfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "stat for %s failed", handle_pfx);
+ return -1;
+ }
- ret = stat (rootstr, &rootbuf);
- switch (ret) {
+ MAKE_HANDLE_ABSPATH_FD(rootstr, this, gfid, dfd);
+ ret = sys_fstatat(dfd, rootstr, &rootbuf, 0);
+ switch (ret) {
case -1:
- if (errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", priv->base_path,
- strerror (errno));
- return -1;
- }
-
- ret = posix_handle_mkdir_hashes (this, rootstr);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "mkdir %s failed (%s)",
- rootstr, strerror (errno));
- return -1;
- }
+ if (errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "%s", priv->base_path);
+ return -1;
+ }
+ ret = posix_handle_mkdir_hashes(this, dfd, gfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "mkdir %s failed", rootstr);
+ return -1;
+ }
- ret = symlink ("../../..", rootstr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "symlink %s creation failed (%s)",
- rootstr, strerror (errno));
- return -1;
- }
- break;
- case 0:
- if ((exportbuf.st_ino == rootbuf.st_ino) &&
- (exportbuf.st_dev == rootbuf.st_dev))
- return 0;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Different dirs %s (%lld/%lld) != %s (%lld/%lld)",
- priv->base_path, (long long) exportbuf.st_ino,
- (long long) exportbuf.st_dev, rootstr,
- (long long) rootbuf.st_ino, (long long) rootbuf.st_dev);
+ ret = sys_symlinkat("../../..", dfd, rootstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "symlink %s creation failed", rootstr);
return -1;
+ }
+ break;
+ case 0:
+ if ((exportbuf.st_ino == rootbuf.st_ino) &&
+ (exportbuf.st_dev == rootbuf.st_dev))
+ return 0;
- break;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE,
+ "Different dirs %s (%lld/%lld) != %s (%lld/%lld)",
+ priv->base_path, (long long)exportbuf.st_ino,
+ (long long)exportbuf.st_dev, rootstr,
+ (long long)rootbuf.st_ino, (long long)rootbuf.st_dev);
+ return -1;
+
+ break;
+ }
- return 0;
+ return 0;
}
gf_boolean_t
-posix_does_old_trash_exists (char *old_trash)
+posix_does_old_trash_exists(char *old_trash)
{
- uuid_t gfid = {0};
- gf_boolean_t exists = _gf_false;
- struct stat stbuf = {0};
- int ret = 0;
-
- ret = lstat (old_trash, &stbuf);
- if ((ret == 0) && S_ISDIR (stbuf.st_mode)) {
- ret = sys_lgetxattr (old_trash, "trusted.gfid", gfid, 16);
- if ((ret < 0) && (errno == ENODATA))
- exists = _gf_true;
- }
- return exists;
+ uuid_t gfid = {0};
+ gf_boolean_t exists = _gf_false;
+ struct stat stbuf = {0};
+ int ret = 0;
+
+ ret = sys_lstat(old_trash, &stbuf);
+ if ((ret == 0) && S_ISDIR(stbuf.st_mode)) {
+ ret = sys_lgetxattr(old_trash, "trusted.gfid", gfid, 16);
+ if ((ret < 0) && (errno == ENODATA || errno == ENOATTR))
+ exists = _gf_true;
+ }
+ return exists;
}
int
-posix_handle_new_trash_init (xlator_t *this, char *trash)
+posix_handle_new_trash_init(xlator_t *this, char *trash)
{
- int ret = 0;
- struct stat stbuf = {0};
+ int ret = 0;
+ struct stat stbuf = {0};
- ret = lstat (trash, &stbuf);
- switch (ret) {
+ ret = sys_lstat(trash, &stbuf);
+ switch (ret) {
case -1:
- if (errno == ENOENT) {
- ret = mkdir (trash, 0755);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Creating directory %s failed: %s",
- trash, strerror (errno));
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR, "Checking for %s "
- "failed: %s", trash, strerror (errno));
+ if (errno == ENOENT) {
+ ret = sys_mkdir(trash, 0755);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE,
+ "Creating directory %s failed", trash);
}
- break;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE, "Checking for %s failed",
+ trash);
+ }
+ break;
case 0:
- if (!S_ISDIR (stbuf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Not a directory: %s", trash);
- ret = -1;
- }
- break;
+ if (!S_ISDIR(stbuf.st_mode)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_HANDLE_TRASH_CREATE, "Not a directory: %s", trash);
+ ret = -1;
+ }
+ break;
default:
- break;
- }
- return ret;
+ break;
+ }
+ return ret;
}
int
-posix_mv_old_trash_into_new_trash (xlator_t *this, char *old, char *new)
+posix_mv_old_trash_into_new_trash(xlator_t *this, char *old, char *new)
{
- char dest_old[PATH_MAX] = {0};
- int ret = 0;
- uuid_t dest_name = {0};
-
- if (!posix_does_old_trash_exists (old))
- goto out;
- uuid_generate (dest_name);
- snprintf (dest_old, sizeof (dest_old), "%s/%s", new,
- uuid_utoa (dest_name));
- ret = rename (old, dest_old);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Not able to move "
- "%s -> %s (%s)", old, dest_old, strerror (errno));
- }
+ char dest_old[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t dest_name = {0};
+
+ if (!posix_does_old_trash_exists(old))
+ goto out;
+ gf_uuid_generate(dest_name);
+ snprintf(dest_old, sizeof(dest_old), "%s/%s", new, uuid_utoa(dest_name));
+ ret = sys_rename(old, dest_old);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_TRASH_CREATE,
+ "Not able to move %s -> %s ", old, dest_old);
+ }
out:
- return ret;
+ return ret;
}
int
-posix_handle_trash_init (xlator_t *this)
+posix_handle_trash_init(xlator_t *this)
{
- int ret = -1;
- struct posix_private *priv = NULL;
- char old_trash[PATH_MAX] = {0};
-
- priv = this->private;
-
- priv->trash_path = GF_CALLOC (1, priv->base_path_length + strlen ("/")
- + strlen (GF_HIDDEN_PATH) + strlen ("/")
- + strlen (TRASH_DIR) + 1,
- gf_posix_mt_trash_path);
-
- if (!priv->trash_path)
- goto out;
-
- strncpy (priv->trash_path, priv->base_path, priv->base_path_length);
- strcat (priv->trash_path, "/" GF_HIDDEN_PATH "/" TRASH_DIR);
- ret = posix_handle_new_trash_init (this, priv->trash_path);
- if (ret)
- goto out;
- snprintf (old_trash, sizeof (old_trash), "%s/.landfill",
- priv->base_path);
- ret = posix_mv_old_trash_into_new_trash (this, old_trash,
- priv->trash_path);
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ char old_trash[PATH_MAX] = {0};
+
+ priv = this->private;
+
+ priv->trash_path = GF_MALLOC(priv->base_path_length + SLEN("/") +
+ SLEN(GF_HIDDEN_PATH) + SLEN("/") +
+ SLEN(TRASH_DIR) + 1,
+ gf_posix_mt_trash_path);
+
+ if (!priv->trash_path)
+ goto out;
+
+ snprintf(
+ priv->trash_path,
+ priv->base_path_length + SLEN(GF_HIDDEN_PATH) + SLEN(TRASH_DIR) + 3,
+ "%s/%s/%s", priv->base_path, GF_HIDDEN_PATH, TRASH_DIR);
+
+ ret = posix_handle_new_trash_init(this, priv->trash_path);
+ if (ret)
+ goto out;
+ snprintf(old_trash, sizeof(old_trash), "%s/.landfill", priv->base_path);
+ ret = posix_mv_old_trash_into_new_trash(this, old_trash, priv->trash_path);
out:
- return ret;
+ return ret;
}
int
-posix_handle_mkdir_hashes (xlator_t *this, const char *newpath)
+posix_handle_mkdir_hashes(xlator_t *this, int dirfd, uuid_t gfid)
{
- char *duppath = NULL;
- char *parpath = NULL;
- int ret = 0;
-
- duppath = strdupa (newpath);
- parpath = dirname (duppath);
- parpath = dirname (duppath);
-
- ret = mkdir (parpath, 0700);
- if (ret == -1 && errno != EEXIST) {
- gf_log (this->name, GF_LOG_ERROR,
- "error mkdir hash-1 %s (%s)",
- parpath, strerror (errno));
- return -1;
- }
-
- strcpy (duppath, newpath);
- parpath = dirname (duppath);
-
- ret = mkdir (parpath, 0700);
- if (ret == -1 && errno != EEXIST) {
- gf_log (this->name, GF_LOG_ERROR,
- "error mkdir hash-2 %s (%s)",
- parpath, strerror (errno));
- return -1;
- }
+ int ret = -1;
+ char d2[3] = {
+ 0,
+ };
+
+ snprintf(d2, sizeof(d2), "%02x", gfid[1]);
+ ret = sys_mkdirat(dirfd, d2, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error mkdir hash-2 %s ", uuid_utoa(gfid));
+ return -1;
+ }
- return 0;
+ return 0;
}
-
int
-posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf)
+posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid,
+ struct stat *oldbuf)
{
- char *newpath = NULL;
- struct stat newbuf;
- int ret = -1;
-
-
- MAKE_HANDLE_ABSPATH (newpath, this, gfid);
+ struct stat newbuf;
+ struct stat hashbuf;
+ int ret = -1;
+ gf_boolean_t link_exists = _gf_false;
+ char d2[3] = {
+ 0,
+ };
+ int dfd = -1;
+ char *newstr = NULL;
+
+ MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd);
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
+
+ if (ret == -1 && errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s",
+ uuid_utoa(gfid));
+ return -1;
+ }
- ret = lstat (newpath, &newbuf);
- if (ret == -1 && errno != ENOENT) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: %s", newpath, strerror (errno));
+ if (ret == -1 && errno == ENOENT) {
+ snprintf(d2, sizeof(d2), "%02x", gfid[1]);
+ ret = sys_fstatat(dfd, d2, &hashbuf, 0);
+ if (ret) {
+ ret = posix_handle_mkdir_hashes(this, dfd, gfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "mkdir %s failed ", uuid_utoa(gfid));
return -1;
+ }
}
+ ret = sys_linkat(AT_FDCWD, oldpath, dfd, newstr);
- if (ret == -1 && errno == ENOENT) {
- ret = posix_handle_mkdir_hashes (this, newpath);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "mkdir %s failed (%s)",
- newpath, strerror (errno));
- return -1;
- }
-
- ret = sys_link (oldpath, newpath);
-
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "link %s -> %s failed (%s)",
- oldpath, newpath, strerror (errno));
- return -1;
- }
-
- ret = lstat (newpath, &newbuf);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "lstat on %s failed (%s)",
- newpath, strerror (errno));
- return -1;
- }
- }
-
- if (newbuf.st_ino != oldbuf->st_ino ||
- newbuf.st_dev != oldbuf->st_dev) {
- gf_log (this->name, GF_LOG_WARNING,
- "mismatching ino/dev between file %s (%lld/%lld) "
- "and handle %s (%lld/%lld)",
- oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev,
- newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev);
- ret = -1;
+ if (ret) {
+ if (errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "link %s -> %s"
+ "failed ",
+ oldpath, newstr);
+ return -1;
+ } else {
+ link_exists = _gf_true;
+ }
}
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
- return ret;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "lstat on %s failed", uuid_utoa(gfid));
+ return -1;
+ }
+ if ((link_exists) && (!S_ISREG(newbuf.st_mode))) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_HANDLE_CREATE,
+ "%s - Expected regular file", uuid_utoa(gfid));
+ return -1;
+ }
+ }
+
+ if (newbuf.st_ino != oldbuf->st_ino || newbuf.st_dev != oldbuf->st_dev) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_CREATE,
+ "mismatching ino/dev between file %s (%lld/%lld) "
+ "and handle %s (%lld/%lld)",
+ oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev,
+ uuid_utoa(gfid), (long long)newbuf.st_ino,
+ (long long)newbuf.st_dev);
+ ret = -1;
+ }
+
+ return ret;
}
-
int
-posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc,
- uuid_t gfid, struct stat *oldbuf)
+posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc,
+ uuid_t gfid, struct stat *oldbuf)
{
- char *oldpath = NULL;
- char *newpath = NULL;
- struct stat newbuf;
- int ret = -1;
-
- MAKE_HANDLE_ABSPATH (newpath, this, gfid);
- MAKE_HANDLE_RELPATH (oldpath, this, loc->pargfid, loc->name);
-
- ret = lstat (newpath, &newbuf);
- if (ret == -1 && errno != ENOENT) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: %s", newpath, strerror (errno));
- return -1;
- }
-
- if (ret == -1 && errno == ENOENT) {
- ret = posix_handle_mkdir_hashes (this, newpath);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "mkdir %s failed (%s)",
- newpath, strerror (errno));
- return -1;
- }
-
- ret = symlink (oldpath, newpath);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "symlink %s -> %s failed (%s)",
- oldpath, newpath, strerror (errno));
- return -1;
- }
+ char *oldpath = NULL;
+ char *newpath = NULL;
+ struct stat newbuf;
+ struct stat hashbuf;
+ int ret = -1;
+ char d2[3] = {
+ 0,
+ };
+ int dfd = -1;
+ char *newstr = NULL;
+
+ MAKE_HANDLE_ABSPATH(newpath, this, gfid);
+ MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd);
+ MAKE_HANDLE_RELPATH(oldpath, this, loc->pargfid, loc->name);
+
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
+
+ if (ret == -1 && errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s",
+ newstr);
+ return -1;
+ }
- ret = lstat (newpath, &newbuf);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "stat on %s failed (%s)",
- newpath, strerror (errno));
- return -1;
- }
+ if (ret == -1 && errno == ENOENT) {
+ if (posix_is_malformed_link(this, newpath, oldpath, strlen(oldpath))) {
+ GF_ASSERT(!"Malformed link");
+ errno = EINVAL;
+ return -1;
}
- ret = stat (real_path, &newbuf);
+ snprintf(d2, sizeof(d2), "%02x", gfid[1]);
+ ret = sys_fstatat(dfd, d2, &hashbuf, 0);
+
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "stat on %s failed (%s)", newpath, strerror (errno));
+ ret = posix_handle_mkdir_hashes(this, dfd, gfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "mkdir %s failed ", newstr);
return -1;
+ }
}
-
- if (!oldbuf)
- return ret;
-
- if (newbuf.st_ino != oldbuf->st_ino ||
- newbuf.st_dev != oldbuf->st_dev) {
- gf_log (this->name, GF_LOG_WARNING,
- "mismatching ino/dev between file %s (%lld/%lld) "
- "and handle %s (%lld/%lld)",
- oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev,
- newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev);
- ret = -1;
+ ret = sys_symlinkat(oldpath, dfd, newstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "symlink %s -> %s failed", oldpath, newstr);
+ return -1;
}
- return ret;
-}
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "stat on %s failed ", newstr);
+ return -1;
+ }
+ }
-static int
-posix_handle_unset_gfid (xlator_t *this, uuid_t gfid)
-{
- char *path = NULL;
- int ret = 0;
- struct stat stat;
+ ret = sys_stat(real_path, &newbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "stat on %s failed ", real_path);
+ return -1;
+ }
- MAKE_HANDLE_GFID_PATH (path, this, gfid, NULL);
+ if (!oldbuf)
+ return ret;
- ret = lstat (path, &stat);
+ if (newbuf.st_ino != oldbuf->st_ino || newbuf.st_dev != oldbuf->st_dev) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_CREATE,
+ "mismatching ino/dev between file %s (%lld/%lld) "
+ "and handle %s (%lld/%lld)",
+ oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev,
+ newpath, (long long)newbuf.st_ino, (long long)newbuf.st_dev);
+ ret = -1;
+ }
- if (ret == -1) {
- if (errno != ENOENT) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: %s", path, strerror (errno));
- }
- goto out;
- }
+ return ret;
+}
- ret = unlink (path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "unlink %s failed (%s)", path, strerror (errno));
- }
+int
+posix_handle_unset_gfid(xlator_t *this, uuid_t gfid)
+{
+ int ret = 0;
+ struct stat stat;
+ int index = 0;
+ int dfd = 0;
+ char newstr[POSIX_GFID_HASH2_LEN] = {
+ 0,
+ };
+ struct posix_private *priv = this->private;
+
+ index = gfid[0];
+ dfd = priv->arrdfd[index];
+
+ snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_utoa(gfid));
+ ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW);
+
+ if (ret == -1) {
+ if (errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s",
+ newstr);
+ }
+ goto out;
+ }
+
+ ret = sys_unlinkat(dfd, newstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE,
+ "unlink %s is failed", newstr);
+ }
out:
- return ret;
+ return ret;
}
-
int
-posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename)
+posix_handle_unset(xlator_t *this, uuid_t gfid, const char *basename)
{
- int ret;
- struct iatt stat;
- char *path = NULL;
-
+ int ret;
+ struct iatt stat;
+ char *path = NULL;
- if (!basename) {
- ret = posix_handle_unset_gfid (this, gfid);
- return ret;
- }
-
- MAKE_HANDLE_PATH (path, this, gfid, basename);
-
- ret = posix_istat (this, gfid, basename, &stat);
+ if (!basename) {
+ ret = posix_handle_unset_gfid(this, gfid);
+ return ret;
+ }
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: %s", path, strerror (errno));
- return -1;
- }
+ MAKE_HANDLE_PATH(path, this, gfid, basename);
+ if (!path) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_DELETE,
+ "Failed to create handle path for %s (%s)", basename,
+ uuid_utoa(gfid));
+ return -1;
+ }
+
+ /* stat is being used only for gfid, so passing a NULL inode
+ * doesn't fetch time attributes which is fine
+ */
+ ret = posix_istat(this, NULL, gfid, basename, &stat);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s",
+ path);
+ return -1;
+ }
- ret = posix_handle_unset_gfid (this, stat.ia_gfid);
+ ret = posix_handle_unset_gfid(this, stat.ia_gfid);
- return ret;
+ return ret;
}
-
int
-posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
- char *real_path)
+posix_create_link_if_gfid_exists(xlator_t *this, uuid_t gfid, char *real_path,
+ inode_table_t *itable)
{
- int ret = -1;
- struct stat stbuf = {0,};
- char *newpath = NULL;
-
- MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
- ret = lstat (newpath, &stbuf);
- if (!ret) {
- ret = sys_link (newpath, real_path);
- }
-
+ int ret = -1;
+ char *newpath = NULL;
+ char *unlink_path = NULL;
+ uint64_t ctx_int = 0;
+ inode_t *inode = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ posix_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ MAKE_HANDLE_PATH(newpath, this, gfid, NULL);
+ if (!newpath) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_CREATE,
+ "Failed to create handle path (%s)", uuid_utoa(gfid));
return ret;
+ }
+
+ ret = sys_lstat(newpath, &stbuf);
+ if (!ret) {
+ ret = sys_link(newpath, real_path);
+ } else {
+ inode = inode_find(itable, gfid);
+ if (!inode)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __posix_inode_ctx_get_all(inode, this, &ctx);
+ if (ret)
+ goto unlock;
+
+ if (ctx->unlink_flag != GF_UNLINK_TRUE) {
+ ret = -1;
+ goto unlock;
+ }
+
+ POSIX_GET_FILE_UNLINK_PATH(priv->base_path, gfid, unlink_path);
+ ret = sys_link(unlink_path, real_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "Failed to link "
+ "%s with %s",
+ real_path, unlink_path);
+ goto unlock;
+ }
+ ret = sys_rename(unlink_path, newpath);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "Failed to link "
+ "%s with %s",
+ real_path, unlink_path);
+ goto unlock;
+ }
+ ctx_int = GF_UNLINK_FALSE;
+ ret = __posix_inode_ctx_set_unlink_flag(inode, this, ctx_int);
+ }
+ unlock:
+ UNLOCK(&inode->lock);
+
+ inode_unref(inode);
+ }
+
+ return ret;
}
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
index 0f596b6069e..f33ed92620d 100644
--- a/xlators/storage/posix/src/posix-handle.h
+++ b/xlators/storage/posix/src/posix-handle.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2011-2017 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -10,232 +10,212 @@
#ifndef _POSIX_HANDLE_H
#define _POSIX_HANDLE_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/types.h>
-#include "xlator.h"
-#include "gf-dirent.h"
-
-#define TRASH_DIR "landfill"
-
-#define UUID0_STR "00000000-0000-0000-0000-000000000000"
-#define SLEN(str) (sizeof(str) - 1)
-
-#define HANDLE_ABSPATH_LEN(this) (POSIX_BASE_PATH_LEN(this) + \
- SLEN("/" GF_HIDDEN_PATH "/00/00/" \
- UUID0_STR) + 1)
-
-#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/'))
-
-#define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) do { \
- var = alloca (strlen (prefix) + UUID_CANONICAL_FORM_LEN + 1); \
- strcpy (var, prefix); \
- strcat (var, uuid_utoa (pgfid)); \
- } while (0)
-
-#define SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
- value = hton32 (value); \
- op_ret = sys_lsetxattr (path, key, &value, sizeof (value), \
- flags); \
- if (op_ret == -1) { \
- op_errno = errno; \
- gf_log (this->name, GF_LOG_WARNING, \
- "setting xattr failed on %s: key = %s (%s)", \
- path, key, strerror (op_errno)); \
- goto label; \
- } \
- } while (0)
-
-
-#define REMOVE_PGFID_XATTR(path, key, op_ret, this, label) do { \
- op_ret = sys_lremovexattr (path, key); \
- if (op_ret == -1) { \
- op_errno = errno; \
- gf_log (this->name, GF_LOG_WARNING, "removing xattr " \
- "failed on %s: key = %s (%s)", path, key, \
- strerror (op_errno)); \
- goto label; \
- } \
- } while (0)
+#include "posix-inode-handle.h"
-/* should be invoked holding a lock */
-#define LINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
- op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
- if (op_ret == -1) { \
- op_errno = errno; \
- if (op_errno == ENOATTR) { \
- value = 1; \
- } else { \
- gf_log (this->name, GF_LOG_WARNING,"getting xattr " \
- "failed on %s: key = %s (%s)", path, key, \
- strerror (op_errno)); \
- goto label; \
- } \
- } else { \
- value = ntoh32 (value); \
- value++; \
- } \
- SET_PGFID_XATTR (path, key, value, flags, op_ret, this, label); \
- } while (0)
+#define HANDLE_ABSPATH_LEN(this) \
+ (POSIX_BASE_PATH_LEN(this) + \
+ SLEN("/" GF_HIDDEN_PATH "/00/00/" UUID0_STR) + 1)
+
+#define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) \
+ do { \
+ var = alloca(SLEN(prefix) + UUID_CANONICAL_FORM_LEN + 1); \
+ strcpy(var, prefix); \
+ strcat(var, uuid_utoa(pgfid)); \
+ } while (0)
+
+#define SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label) \
+ do { \
+ value = hton32(value); \
+ op_ret = sys_lsetxattr(path, key, &value, sizeof(value), flags); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \
+ "setting xattr failed on %s: key = %s ", path, key); \
+ goto label; \
+ } \
+ } while (0)
+
+#define SET_PGFID_XATTR_IF_ABSENT(path, key, value, flags, op_ret, this, \
+ label) \
+ do { \
+ op_ret = sys_lgetxattr(path, key, &value, sizeof(value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ if (op_errno == ENOATTR) { \
+ value = 1; \
+ SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label); \
+ } else { \
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PGFID_OP, \
+ "getting xattr " \
+ "failed on %s: key = %s ", \
+ path, key); \
+ } \
+ } \
+ } while (0)
+
+#define REMOVE_PGFID_XATTR(path, key, op_ret, this, label) \
+ do { \
+ op_ret = sys_lremovexattr(path, key); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PGFID_OP, \
+ "removing xattr failed" \
+ "on %s: key = %s", \
+ path, key); \
+ goto label; \
+ } \
+ } while (0)
/* should be invoked holding a lock */
-#define UNLINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \
- op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \
- if (op_ret == -1) { \
- op_errno = errno; \
- gf_log (this->name, GF_LOG_WARNING, "getting xattr failed on " \
- "%s: key = %s (%s)", path, key, strerror (op_errno)); \
- goto label; \
- } else { \
- value = ntoh32 (value); \
- value--; \
- if (value > 0) { \
- SET_PGFID_XATTR (path, key, value, flags, op_ret, \
- this, label); \
- } else { \
- REMOVE_PGFID_XATTR (path, key, op_ret, this, label); \
- } \
- } \
+#define LINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) \
+ do { \
+ op_ret = sys_lgetxattr(path, key, &value, sizeof(value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ if (op_errno == ENOATTR || op_errno == ENODATA) { \
+ value = 1; \
+ } else { \
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \
+ "getting xattr " \
+ "failed on %s: key = %s ", \
+ path, key); \
+ goto label; \
+ } \
+ } else { \
+ value = ntoh32(value); \
+ value++; \
+ } \
+ SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label); \
} while (0)
-#define MAKE_REAL_PATH(var, this, path) do { \
- var = alloca (strlen (path) + POSIX_BASE_PATH_LEN(this) + 2); \
- strcpy (var, POSIX_BASE_PATH(this)); \
- strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \
- } while (0)
-
-#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \
- int __len; \
- __len = posix_handle_path (this, gfid, base, NULL, 0); \
- if (__len <= 0) \
- break; \
- var = alloca (__len); \
- __len = posix_handle_path (this, gfid, base, var, __len); \
- } while (0)
-
-
-#define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) do { \
- int __len = 0; \
- __len = posix_handle_gfid_path (this, gfid, base, NULL, 0); \
- if (__len <= 0) \
- break; \
- var = alloca (__len); \
- __len = posix_handle_gfid_path (this, gfid, base, var, __len); \
- } while (0)
-
-
-#define MAKE_HANDLE_RELPATH(var, this, gfid, base) do { \
- int __len; \
- __len = posix_handle_relpath (this, gfid, base, NULL, 0); \
- if (__len <= 0) \
- break; \
- var = alloca (__len); \
- __len = posix_handle_relpath (this, gfid, base, var, __len); \
- } while (0)
-
-
-#define MAKE_HANDLE_ABSPATH(var, this, gfid) do { \
- struct posix_private * __priv = this->private; \
- int __len = HANDLE_ABSPATH_LEN(this); \
- var = alloca(__len); \
- snprintf(var, __len, "%s/" GF_HIDDEN_PATH "/%02x/%02x/%s", \
- __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \
- } while (0)
-
-
-#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \
- if (uuid_is_null (loc->gfid)) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "null gfid for path %s", (loc)->path); \
- break; \
- } \
- if (LOC_HAS_ABSPATH (loc)) { \
- MAKE_REAL_PATH (rpath, this, (loc)->path); \
- op_ret = posix_pstat (this, (loc)->gfid, rpath, iatt_p); \
- break; \
- } \
- errno = 0; \
- op_ret = posix_istat (this, loc->gfid, NULL, iatt_p); \
- if (errno != ELOOP) { \
- MAKE_HANDLE_PATH (rpath, this, loc->gfid, NULL); \
- break; \
- } \
- /* __ret == -1 && errno == ELOOP */ \
- } while (0)
-
-
-#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) do { \
- char *__parp; \
- \
- if (uuid_is_null (loc->pargfid) || !loc->name) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "null pargfid/name for path %s", loc->path); \
- break; \
- } \
- \
- if (LOC_HAS_ABSPATH (loc)) { \
- MAKE_REAL_PATH (entp, this, loc->path); \
- __parp = strdupa (entp); \
- parp = dirname (__parp); \
- op_ret = posix_pstat (this, NULL, entp, ent_p); \
- break; \
- } \
- errno = 0; \
- op_ret = posix_istat (this, loc->pargfid, loc->name, ent_p); \
- if (errno != ELOOP) { \
- MAKE_HANDLE_PATH (parp, this, loc->pargfid, NULL); \
- MAKE_HANDLE_PATH (entp, this, loc->pargfid, loc->name); \
- break; \
- } \
- /* __ret == -1 && errno == ELOOP */ \
- /* expand ELOOP */ \
- } while (0)
-
-
-#define POSIX_ANCESTRY_PATH (1 << 0)
-#define POSIX_ANCESTRY_DENTRY (1 << 1)
+/* should be invoked holding a lock */
+#define UNLINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, \
+ label) \
+ do { \
+ op_ret = sys_lgetxattr(path, key, &value, sizeof(value)); \
+ if (op_ret == -1) { \
+ op_errno = errno; \
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \
+ "getting xattr failed on " \
+ "%s: key = %s ", \
+ path, key); \
+ goto label; \
+ } else { \
+ value = ntoh32(value); \
+ value--; \
+ if (value > 0) { \
+ SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label); \
+ } else { \
+ REMOVE_PGFID_XATTR(path, key, op_ret, this, label); \
+ } \
+ } \
+ } while (0)
-int
-posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf,
- size_t len);
+#define MAKE_HANDLE_GFID_PATH(var, this, gfid) \
+ do { \
+ int __len = 0; \
+ struct posix_private *__priv = this->private; \
+ __len = POSIX_GFID_HANDLE_SIZE(__priv->base_path_length); \
+ __len += 256; \
+ var = alloca(__len); \
+ __len = posix_handle_gfid_path(this, gfid, var, __len); \
+ } while (0)
-int
-posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize,
- gf_dirent_t *head, int type, uuid_t gfid,
- const size_t handle_size,
- const char *priv_base_path,
- inode_table_t *table, inode_t **parent,
- dict_t *xdata);
-int
-posix_handle_path_safe (xlator_t *this, uuid_t gfid, const char *basename,
- char *buf, size_t len);
+#define MAKE_HANDLE_RELPATH(var, this, gfid, base) \
+ do { \
+ int __len; \
+ __len = POSIX_GFID_HANDLE_RELSIZE; \
+ if (base) { \
+ __len += (strlen(base) + 1); \
+ } \
+ var = alloca(__len); \
+ __len = posix_handle_relpath(this, gfid, base, var, __len); \
+ } while (0)
-int
-posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
- char *buf, size_t len);
+#define MAKE_HANDLE_ABSPATH(var, this, gfid) \
+ do { \
+ struct posix_private *__priv = this->private; \
+ int __len = HANDLE_ABSPATH_LEN(this); \
+ var = alloca(__len); \
+ snprintf(var, __len, "%s/" GF_HIDDEN_PATH "/%02x/%02x/%s", \
+ __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \
+ } while (0)
-int
-posix_handle_hard (xlator_t *this, const char *path, uuid_t gfid,
- struct stat *buf);
+#define MAKE_HANDLE_ABSPATH_FD(var, this, gfid, dfd) \
+ do { \
+ struct posix_private *__priv = this->private; \
+ int findex = gfid[0]; \
+ int __len = POSIX_GFID_HASH2_LEN; \
+ var = alloca(__len); \
+ snprintf(var, __len, "%02x/%s", gfid[1], uuid_utoa(gfid)); \
+ dfd = __priv->arrdfd[findex]; \
+ } while (0)
+#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) \
+ do { \
+ char *__parp; \
+ \
+ if (gf_uuid_is_null(loc->pargfid) || !loc->name) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_ENTRY_HANDLE_CREATE, \
+ "null pargfid/name for path %s", loc->path); \
+ break; \
+ } \
+ \
+ if (strchr(loc->name, '/')) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_ENTRY_HANDLE_CREATE, \
+ "'/' in name not allowed: (%s)", loc->name); \
+ op_ret = -1; \
+ break; \
+ } \
+ if (LOC_HAS_ABSPATH(loc)) { \
+ MAKE_REAL_PATH(entp, this, loc->path); \
+ __parp = strdupa(entp); \
+ parp = dirname(__parp); \
+ op_ret = posix_pstat(this, loc->inode, NULL, entp, ent_p, \
+ _gf_false); \
+ break; \
+ } \
+ errno = 0; \
+ op_ret = posix_istat(this, loc->inode, loc->pargfid, loc->name, \
+ ent_p); \
+ if (errno != ELOOP) { \
+ MAKE_HANDLE_PATH(parp, this, loc->pargfid, NULL); \
+ MAKE_HANDLE_PATH(entp, this, loc->pargfid, loc->name); \
+ if (!parp || !entp) { \
+ gf_msg(this->name, GF_LOG_ERROR, errno, \
+ P_MSG_ENTRY_HANDLE_CREATE, \
+ "Failed to create entry handle " \
+ "for path %s", \
+ loc->path); \
+ } \
+ break; \
+ } \
+ /* __ret == -1 && errno == ELOOP */ \
+ /* expand ELOOP */ \
+ } while (0)
+#define POSIX_GFID_HASH2_LEN 45
int
-posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc,
- uuid_t gfid, struct stat *buf);
+posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t len);
int
-posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename);
+posix_handle_hard(xlator_t *this, const char *path, uuid_t gfid,
+ struct stat *buf);
-int posix_handle_mkdir_hashes (xlator_t *this, const char *newpath);
+int
+posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc,
+ uuid_t gfid, struct stat *buf);
-int posix_handle_init (xlator_t *this);
+int
+posix_handle_unset(xlator_t *this, uuid_t gfid, const char *basename);
-int posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
- char *real_path);
+int
+posix_create_link_if_gfid_exists(xlator_t *this, uuid_t gfid, char *real_path,
+ inode_table_t *itable);
int
-posix_handle_trash_init (xlator_t *this);
+posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata);
+
+void
+posix_disk_space_check(xlator_t *this);
#endif /* !_POSIX_HANDLE_H */
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 11e423f6bf0..67db3324083 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -7,11 +7,6 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#define __XOPEN_SOURCE 500
#include <stdint.h>
@@ -23,1603 +18,3649 @@
#include <ftw.h>
#include <sys/stat.h>
#include <signal.h>
+#include <aio.h>
+
+#ifdef HAVE_SYS_ACL_H
+#ifdef HAVE_ACL_LIBACL_H /* for acl_to_any_text() */
+#include <acl/libacl.h>
+#else /* FreeBSD and others */
+#include <sys/acl.h>
+#endif
+#endif
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
#endif /* GF_BSD_HOST_OS */
-#include "glusterfs.h"
-#include "checksum.h"
-#include "dict.h"
-#include "logging.h"
+#include <fnmatch.h>
#include "posix.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "syscall.h"
-#include "statedump.h"
-#include "locking.h"
-#include "timer.h"
+#include "posix-messages.h"
+#include "posix-metadata.h"
+#include "posix-handle.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
#include "glusterfs3-xdr.h"
-#include "hashfn.h"
-#include "glusterfs-acl.h"
-#include <fnmatch.h>
+#include <glusterfs/glusterfs-acl.h>
+#include "posix-gfid-path.h"
+#include <glusterfs/events.h>
+#include "glusterfs/syncop.h"
+#include "timer-wheel.h"
+#include <sys/types.h>
char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
- "trusted.glusterfs.*.xtime",
- NULL};
-
-char *marker_contri_key = "trusted.*.*.contri";
-
-static char* posix_ignore_xattrs[] = {
- "gfid-req",
- GLUSTERFS_ENTRYLK_COUNT,
- GLUSTERFS_INODELK_COUNT,
- GLUSTERFS_POSIXLK_COUNT,
- GLUSTERFS_PARENT_ENTRYLK,
- GF_GFIDLESS_LOOKUP,
- NULL
-};
-
-static char* list_xattr_ignore_xattrs[] = {
- GF_SELINUX_XATTR_KEY,
- GF_XATTR_VOL_ID_KEY,
- GFID_XATTR_KEY,
- NULL
-};
+ "trusted.glusterfs.*.xtime", NULL};
+
+static char *marker_contri_key = "trusted.*.*.contri";
+
+static char *posix_ignore_xattrs[] = {"gfid-req",
+ GLUSTERFS_INTERNAL_FOP_KEY,
+ GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GF_GFIDLESS_LOOKUP,
+ GLUSTERFS_INODELK_DOM_COUNT,
+ NULL};
+
+static char *list_xattr_ignore_xattrs[] = {GFID_XATTR_KEY, GF_XATTR_VOL_ID_KEY,
+ GF_SELINUX_XATTR_KEY, NULL};
+
gf_boolean_t
-posix_special_xattr (char **pattern, char *key)
+posix_special_xattr(char **pattern, char *key)
{
- int i = 0;
- gf_boolean_t flag = _gf_false;
+ int i = 0;
+ gf_boolean_t flag = _gf_false;
- GF_VALIDATE_OR_GOTO ("posix", pattern, out);
- GF_VALIDATE_OR_GOTO ("posix", key, out);
+ GF_VALIDATE_OR_GOTO("posix", pattern, out);
+ GF_VALIDATE_OR_GOTO("posix", key, out);
- for (i = 0; pattern[i]; i++) {
- if (!fnmatch (pattern[i], key, 0)) {
- flag = _gf_true;
- break;
- }
+ for (i = 0; pattern[i]; i++) {
+ if (!fnmatch(pattern[i], key, 0)) {
+ flag = _gf_true;
+ break;
}
+ }
out:
- return flag;
+ return flag;
}
-static gf_boolean_t
-_is_in_array (char **str_array, char *str)
+int
+posix_handle_mdata_xattr(call_frame_t *frame, const char *name, int *op_errno)
{
- int i = 0;
-
- for (i = 0; str_array[i]; i++) {
- if (strcmp (str, str_array[i]) == 0)
- return _gf_true;
+ int i = 0;
+ int ret = 0;
+ int pid = 1;
+ static const char *const internal_xattr[] = {GF_XATTR_MDATA_KEY, NULL};
+ if (frame && frame->root) {
+ pid = frame->root->pid;
+ }
+
+ if (!name || pid < GF_CLIENT_PID_MAX) {
+ /* No need to do anything here */
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; internal_xattr[i]; i++) {
+ if (fnmatch(internal_xattr[i], name, FNM_PERIOD) == 0) {
+ ret = -1;
+ if (op_errno) {
+ *op_errno = ENOATTR;
+ }
+
+ gf_msg_debug("posix", ENOATTR,
+ "Ignoring the key %s as an internal "
+ "xattrs.",
+ name);
+ goto out;
}
- return _gf_false;
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-static gf_boolean_t
-posix_xattr_ignorable (char *key, posix_xattr_filler_t *filler)
+int
+posix_handle_georep_xattrs(call_frame_t *frame, const char *name, int *op_errno,
+ gf_boolean_t is_getxattr)
{
- gf_boolean_t ignore = _gf_false;
-
- GF_ASSERT (key);
- if (!key)
- goto out;
-
- ignore = _is_in_array (posix_ignore_xattrs, key);
- if (ignore)
- goto out;
+ int i = 0;
+ int ret = 0;
+ int pid = 1;
+ gf_boolean_t filter_xattr = _gf_true;
+ static const char *georep_xattr[] = {
+ "*.glusterfs.*.stime", "*.glusterfs.*.xtime",
+ "*.glusterfs.*.entry_stime", "*.glusterfs.volume-mark.*", NULL};
+
+ if (!name) {
+ /* No need to do anything here */
+ ret = 0;
+ goto out;
+ }
+
+ if (frame && frame->root) {
+ pid = frame->root->pid;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD && is_getxattr) {
+ filter_xattr = _gf_false;
+
+ /* getxattr from gsyncd process should return all the
+ * internal xattr. In other cases ignore such xattrs
+ */
+ }
+
+ for (i = 0; filter_xattr && georep_xattr[i]; i++) {
+ if (fnmatch(georep_xattr[i], name, FNM_PERIOD) == 0) {
+ ret = -1;
+ if (op_errno)
+ *op_errno = ENOATTR;
+
+ gf_msg_debug("posix", ENOATTR,
+ "Ignoring the key %s as an internal "
+ "xattrs.",
+ name);
+ goto out;
+ }
+ }
- if ((!strcmp (key, GF_CONTENT_KEY))
- && (!IA_ISREG (filler->stbuf->ia_type)))
- ignore = _gf_true;
+ ret = 0;
out:
- return ignore;
+ return ret;
}
-static int
-_posix_xattr_get_set_from_backend (posix_xattr_filler_t *filler, char *key)
-{
- ssize_t xattr_size = -1;
- int ret = 0;
- char *value = NULL;
-
- xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
-
- if (xattr_size > 0) {
- value = GF_CALLOC (1, xattr_size + 1,
- gf_posix_mt_char);
- if (!value)
- goto out;
-
- xattr_size = sys_lgetxattr (filler->real_path, key, value,
- xattr_size);
- if (xattr_size <= 0) {
- gf_log (filler->this->name, GF_LOG_WARNING,
- "getxattr failed. path: %s, key: %s",
- filler->real_path, key);
- GF_FREE (value);
- goto out;
- }
+int32_t
+posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict, struct iatt *in_stbuf)
+{
+ int ret = -1;
+ mode_t mode = 0;
- value[xattr_size] = '\0';
- ret = dict_set_bin (filler->xattr, key,
- value, xattr_size);
- if (ret < 0) {
- gf_log (filler->this->name, GF_LOG_DEBUG,
- "dict set failed. path: %s, key: %s",
- filler->real_path, key);
- GF_FREE (value);
- goto out;
- }
- }
+ if ((!in_dict) || (!in_stbuf) || (!out_dict)) {
+ goto out;
+ }
+
+ /* We need this only for files */
+ if (!(IA_ISREG(in_stbuf->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Nobody asked for this */
+ if (!dict_get(in_dict, DHT_MODE_IN_XDATA_KEY)) {
ret = 0;
+ goto out;
+ }
+ mode = st_mode_from_ia(in_stbuf->ia_prot, in_stbuf->ia_type);
+
+ ret = dict_set_int32(out_dict, DHT_MODE_IN_XDATA_KEY, mode);
+
out:
- return ret;
+ return ret;
}
-static int gf_posix_xattr_enotsup_log;
+static gf_boolean_t
+posix_xattr_ignorable(char *key)
+{
+ return gf_get_index_by_elem(posix_ignore_xattrs, key) >= 0;
+}
static int
-_posix_get_marker_all_contributions (posix_xattr_filler_t *filler)
+_posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key)
{
- ssize_t size = -1, remaining_size = -1, list_offset = 0;
- int ret = -1;
- char *list = NULL, key[4096] = {0, };
-
- size = sys_llistxattr (filler->real_path, NULL, 0);
- if (size == -1) {
- if ((errno == ENOTSUP) || (errno == ENOSYS)) {
- GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
- THIS->name, GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting brick"
- " with 'user_xattr' flag)");
-
- } else {
- gf_log (THIS->name, GF_LOG_WARNING,
- "listxattr failed on %s: %s",
- filler->real_path, strerror (errno));
-
- }
-
- goto out;
+ ssize_t xattr_size = 256; /* guesstimated initial size of xattr */
+ int ret = -1;
+ char *value = NULL;
+
+ if (!gf_is_valid_xattr_namespace(key)) {
+ goto out;
+ }
+
+ /* Most of the gluster internal xattrs don't exceed 256 bytes. So try
+ * getxattr with ~256 bytes. If it gives ERANGE then go the old way
+ * of getxattr with NULL buf to find the length and then getxattr with
+ * allocated buf to fill the data. This way we reduce lot of getxattrs.
+ */
+
+ value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char);
+ if (!value) {
+ goto out;
+ }
+
+ if (filler->real_path)
+ xattr_size = sys_lgetxattr(filler->real_path, key, value, xattr_size);
+ else
+ xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size);
+
+ if (xattr_size == -1) {
+ if (value) {
+ GF_FREE(value);
+ value = NULL;
}
-
- if (size == 0) {
- ret = 0;
- goto out;
+ /* xattr_size == -1 - failed to fetch the xattr with
+ * current settings.
+ * If it was not because value was too small, abort
+ */
+ if (errno != ERANGE) {
+ goto out;
}
- list = alloca (size);
- if (!list) {
- goto out;
+ /* Get the real length needed */
+ if (filler->real_path) {
+ xattr_size = sys_lgetxattr(filler->real_path, key, NULL, 0);
+ } else {
+ xattr_size = sys_fgetxattr(filler->fdnum, key, NULL, 0);
+ }
+ if (xattr_size == -1) {
+ goto out;
}
- size = sys_llistxattr (filler->real_path, list, size);
- if (size <= 0) {
- ret = size;
- goto out;
+ value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char);
+ if (!value) {
+ goto out;
}
- remaining_size = size;
- list_offset = 0;
+ if (filler->real_path) {
+ xattr_size = sys_lgetxattr(filler->real_path, key, value,
+ xattr_size);
+ } else {
+ xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size);
+ }
+ if (xattr_size == -1) {
+ GF_FREE(value);
+ value = NULL;
+ if (filler->real_path)
+ gf_msg(filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_XATTR_FAILED, "getxattr failed. path: %s, key: %s",
+ filler->real_path, key);
+ else
+ gf_msg(filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_XATTR_FAILED, "getxattr failed. gfid: %s, key: %s",
+ uuid_utoa(filler->fd->inode->gfid), key);
+ goto out;
+ }
+ }
+
+ value[xattr_size] = '\0';
+ ret = dict_set_bin(filler->xattr, key, value, xattr_size);
+
+ if (ret < 0) {
+ if (value)
+ GF_FREE(value);
+ if (filler->real_path)
+ gf_msg_debug(filler->this->name, 0,
+ "dict set failed. path: %s, key: %s",
+ filler->real_path, key);
+ else
+ gf_msg_debug(filler->this->name, 0,
+ "dict set failed. gfid: %s, key: %s",
+ uuid_utoa(filler->fd->inode->gfid), key);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
- while (remaining_size > 0) {
- strcpy (key, list + list_offset);
- if (fnmatch (marker_contri_key, key, 0) == 0) {
- ret = _posix_xattr_get_set_from_backend (filler, key);
- }
+static int gf_posix_xattr_enotsup_log;
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
+static int
+_posix_get_marker_all_contributions(posix_xattr_filler_t *filler)
+{
+ ssize_t size = -1, remaining_size = -1, list_offset = 0;
+ int ret = -1;
+ int len;
+ char *list = NULL, key[4096] = {
+ 0,
+ };
+
+ if (filler->real_path)
+ size = sys_llistxattr(filler->real_path, NULL, 0);
+ else
+ size = sys_flistxattr(filler->fdnum, NULL, 0);
+ if (size == -1) {
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, THIS->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
+ } else {
+ if (filler->real_path)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on %s", filler->real_path);
+ else
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on %s",
+ uuid_utoa(filler->fd->inode->gfid));
}
+ goto out;
+ }
+ if (size == 0) {
ret = 0;
+ goto out;
+ }
+
+ list = alloca(size);
+ if (!list) {
+ goto out;
+ }
+
+ if (filler->real_path)
+ size = sys_llistxattr(filler->real_path, list, size);
+ else
+ size = sys_flistxattr(filler->fdnum, list, size);
+ if (size <= 0) {
+ ret = size;
+ goto out;
+ }
+
+ remaining_size = size;
+ list_offset = 0;
+
+ while (remaining_size > 0) {
+ len = snprintf(key, sizeof(key), "%s", list + list_offset);
+ if (fnmatch(marker_contri_key, key, 0) == 0) {
+ (void)_posix_xattr_get_set_from_backend(filler, key);
+ }
+ remaining_size -= (len + 1);
+ list_offset += (len + 1);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
static int
-_posix_get_marker_quota_contributions (posix_xattr_filler_t *filler, char *key)
+_posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key)
{
- char *saveptr = NULL, *token = NULL, *tmp_key = NULL;
- char *ptr = NULL;
- int i = 0, ret = 0;
+ char *saveptr = NULL, *token = NULL, *tmp_key = NULL;
+ char *ptr = NULL;
+ int i = 0, ret = 0;
- tmp_key = ptr = gf_strdup (key);
- for (i = 0; i < 4; i++) {
- token = strtok_r (tmp_key, ".", &saveptr);
- tmp_key = NULL;
- }
+ tmp_key = ptr = gf_strdup(key);
+ if (tmp_key == NULL) {
+ return -1;
+ }
+ for (i = 0; i < 4; i++) {
+ token = strtok_r(tmp_key, ".", &saveptr);
+ tmp_key = NULL;
+ }
- if (strncmp (token, "contri", strlen ("contri")) == 0) {
- ret = _posix_get_marker_all_contributions (filler);
- } else {
- ret = _posix_xattr_get_set_from_backend (filler, key);
- }
+ if (strncmp(token, "contri", SLEN("contri")) == 0) {
+ ret = _posix_get_marker_all_contributions(filler);
+ } else {
+ ret = _posix_xattr_get_set_from_backend(filler, key);
+ }
- GF_FREE (ptr);
+ GF_FREE(ptr);
- return ret;
+ return ret;
+}
+
+static inode_t *
+_get_filler_inode(posix_xattr_filler_t *filler)
+{
+ if (filler->fd)
+ return filler->fd->inode;
+ else if (filler->loc && filler->loc->inode)
+ return filler->loc->inode;
+ else
+ return NULL;
}
static int
-_posix_xattr_get_set (dict_t *xattr_req,
- char *key,
- data_t *data,
- void *xattrargs)
+_posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data,
+ void *xattrargs)
{
- posix_xattr_filler_t *filler = xattrargs;
- int ret = -1;
- char *databuf = NULL;
- int _fd = -1;
- loc_t *loc = NULL;
- ssize_t req_size = 0;
+ posix_xattr_filler_t *filler = xattrargs;
+ int ret = -1;
+ int len = 0;
+ char *databuf = NULL;
+ int _fd = -1;
+ ssize_t req_size = 0;
+ int32_t list_offset = 0;
+ ssize_t remaining_size = 0;
+ char *xattr = NULL;
+ inode_t *inode = NULL;
+ char *value = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ if (posix_xattr_ignorable(key))
+ goto out;
+
+ len = strlen(key);
+ /* should size be put into the data_t ? */
+ if ((filler->stbuf != NULL && IA_ISREG(filler->stbuf->ia_type)) &&
+ (len == SLEN(GF_CONTENT_KEY) && !strcmp(key, GF_CONTENT_KEY))) {
+ if (!filler->real_path)
+ goto out;
+
+ /* file content request */
+ req_size = data_to_uint64(data);
+ if (req_size >= filler->stbuf->ia_size) {
+ _fd = open(filler->real_path, O_RDONLY);
+ if (_fd == -1) {
+ gf_msg(filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR, "Opening file %s failed",
+ filler->real_path);
+ goto err;
+ }
+
+ /*
+ * There could be a situation where the ia_size is
+ * zero. GF_CALLOC will return a pointer to the
+ * memory initialized by gf_mem_set_acct_info.
+ * This function adds a header and a footer to
+ * the allocated memory. The returned pointer
+ * points to the memory just after the header, but
+ * when size is zero, there is no space for user
+ * data. The memory can be freed by calling GF_FREE.
+ */
+ databuf = GF_CALLOC(1, filler->stbuf->ia_size, gf_posix_mt_char);
+ if (!databuf) {
+ goto err;
+ }
+
+ ret = sys_read(_fd, databuf, filler->stbuf->ia_size);
+ if (ret == -1) {
+ gf_msg(filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR, "Read on file %s failed",
+ filler->real_path);
+ goto err;
+ }
+
+ ret = sys_close(_fd);
+ _fd = -1;
+ if (ret == -1) {
+ gf_msg(filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR, "Close on file %s failed",
+ filler->real_path);
+ goto err;
+ }
+
+ ret = dict_set_bin(filler->xattr, key, databuf,
+ filler->stbuf->ia_size);
+ if (ret < 0) {
+ gf_msg(filler->this->name, GF_LOG_ERROR, 0,
+ P_MSG_XDATA_GETXATTR,
+ "failed to set dict value. key: %s,"
+ "path: %s",
+ key, filler->real_path);
+ goto err;
+ }
+
+ /* To avoid double free in cleanup below */
+ databuf = NULL;
+ err:
+ if (_fd != -1)
+ sys_close(_fd);
+ GF_FREE(databuf);
+ }
+ } else if (len == SLEN(GLUSTERFS_OPEN_FD_COUNT) &&
+ !strcmp(key, GLUSTERFS_OPEN_FD_COUNT)) {
+ inode = _get_filler_inode(filler);
+ if (!inode || gf_uuid_is_null(inode->gfid))
+ goto out;
+ ret = dict_set_uint32(filler->xattr, key, inode->fd_count);
+ if (ret < 0) {
+ gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value for %s", key);
+ }
+ } else if (len == SLEN(GLUSTERFS_ACTIVE_FD_COUNT) &&
+ !strcmp(key, GLUSTERFS_ACTIVE_FD_COUNT)) {
+ inode = _get_filler_inode(filler);
+ if (!inode || gf_uuid_is_null(inode->gfid))
+ goto out;
+ ret = dict_set_uint32(filler->xattr, key, inode->active_fd_count);
+ if (ret < 0) {
+ gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value for %s", key);
+ }
+ } else if (len == SLEN(GET_ANCESTRY_PATH_KEY) &&
+ !strcmp(key, GET_ANCESTRY_PATH_KEY)) {
+ /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
+ * fetching it via path-based fops. Hence, leaving it as it is
+ * for now.
+ */
+ if (!filler->real_path)
+ goto out;
+ char *path = NULL;
+ ret = posix_get_ancestry(filler->this, filler->loc->inode, NULL, &path,
+ POSIX_ANCESTRY_PATH, &filler->op_errno,
+ xattr_req);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_set_dynstr_sizen(filler->xattr, GET_ANCESTRY_PATH_KEY, path);
+ if (ret < 0) {
+ GF_FREE(path);
+ goto out;
+ }
- if (posix_xattr_ignorable (key, filler))
- goto out;
- /* should size be put into the data_t ? */
- if (!strcmp (key, GF_CONTENT_KEY)
- && IA_ISREG (filler->stbuf->ia_type)) {
-
- /* file content request */
- req_size = data_to_uint64 (data);
- if (req_size >= filler->stbuf->ia_size) {
- _fd = open (filler->real_path, O_RDONLY);
- if (_fd == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Opening file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- /*
- * There could be a situation where the ia_size is
- * zero. GF_CALLOC will return a pointer to the
- * memory initialized by gf_mem_set_acct_info.
- * This function adds a header and a footer to
- * the allocated memory. The returned pointer
- * points to the memory just after the header, but
- * when size is zero, there is no space for user
- * data. The memory can be freed by calling GF_FREE.
- */
- databuf = GF_CALLOC (1, filler->stbuf->ia_size,
- gf_posix_mt_char);
- if (!databuf) {
- goto err;
- }
-
- ret = read (_fd, databuf, filler->stbuf->ia_size);
- if (ret == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Read on file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- ret = close (_fd);
- _fd = -1;
- if (ret == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Close on file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- ret = dict_set_bin (filler->xattr, key,
- databuf, filler->stbuf->ia_size);
- if (ret < 0) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "failed to set dict value. key: %s, path: %s",
- key, filler->real_path);
- goto err;
- }
-
- /* To avoid double free in cleanup below */
- databuf = NULL;
- err:
- if (_fd != -1)
- close (_fd);
- GF_FREE (databuf);
- }
- } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
- loc = filler->loc;
- if (loc) {
- ret = dict_set_uint32 (filler->xattr, key,
- loc->inode->fd_count);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- key);
- }
- } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
- char *path = NULL;
- ret = posix_get_ancestry (filler->this, filler->loc->inode,
- NULL, &path, POSIX_ANCESTRY_PATH,
- &filler->op_errno, xattr_req);
- if (ret < 0) {
- goto out;
- }
+ } else if (fnmatch(marker_contri_key, key, 0) == 0) {
+ ret = _posix_get_marker_quota_contributions(filler, key);
+ } else if (len == SLEN(GF_REQUEST_LINK_COUNT_XDATA) &&
+ strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) {
+ ret = dict_set_sizen(filler->xattr, GF_REQUEST_LINK_COUNT_XDATA, data);
+ } else if (len == SLEN(GF_GET_SIZE) && strcmp(key, GF_GET_SIZE) == 0) {
+ if (filler->stbuf && IA_ISREG(filler->stbuf->ia_type)) {
+ ret = dict_set_uint64(filler->xattr, GF_GET_SIZE,
+ filler->stbuf->ia_size);
+ }
+ } else if (GF_POSIX_ACL_REQUEST(key)) {
+ if (filler->real_path)
+ ret = posix_pstat(filler->this, NULL, NULL, filler->real_path,
+ &stbuf, _gf_false);
+ else
+ ret = posix_fdstat(filler->this, filler->fd->inode, filler->fdnum,
+ &stbuf);
+ if (ret < 0) {
+ gf_msg(filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR, "lstat on %s failed",
+ filler->real_path ?: uuid_utoa(filler->fd->inode->gfid));
+ goto out;
+ }
- ret = dict_set_dynstr (filler->xattr, GET_ANCESTRY_PATH_KEY,
- path);
- if (ret < 0) {
- GF_FREE (path);
- goto out;
- }
+ /* Avoid link follow in virt_pacl_get, donot fill acl for symlink.*/
+ if (IA_ISLNK(stbuf.ia_type))
+ goto out;
+
+ /* ACL_TYPE_DEFAULT is not supported for non-directory, skip */
+ if (!IA_ISDIR(stbuf.ia_type) &&
+ !strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT)))
+ goto out;
+
+ ret = posix_pacl_get(filler->real_path, filler->fdnum, key, &value);
+ if (ret || !value) {
+ gf_msg(filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR, "could not get acl (%s) for %s, %d",
+ key, filler->real_path ?: uuid_utoa(filler->fd->inode->gfid),
+ ret);
+ goto out;
+ }
- } else if (fnmatch (marker_contri_key, key, 0) == 0) {
- ret = _posix_get_marker_quota_contributions (filler, key);
- } else {
- ret = _posix_xattr_get_set_from_backend (filler, key);
+ ret = dict_set_dynstrn(filler->xattr, (char *)key, len, value);
+ if (ret < 0) {
+ GF_FREE(value);
+ gf_msg(filler->this->name, GF_LOG_ERROR, errno,
+ P_MSG_XDATA_GETXATTR,
+ "could not set acl (%s) for %s in dictionary", key,
+ filler->real_path ?: uuid_utoa(filler->fd->inode->gfid));
+ goto out;
+ }
+ } else {
+ remaining_size = filler->list_size;
+ while (remaining_size > 0) {
+ xattr = filler->list + list_offset;
+ if (fnmatch(key, xattr, 0) == 0)
+ ret = _posix_xattr_get_set_from_backend(filler, xattr);
+ len = strlen(xattr);
+ remaining_size -= (len + 1);
+ list_offset += (len + 1);
}
+ }
out:
- return 0;
+ return 0;
}
-
int
-posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt)
+posix_fill_gfid_path(xlator_t *this, const char *path, struct iatt *iatt)
{
- int ret = 0;
- ssize_t size = 0;
+ int ret = 0;
+ ssize_t size = 0;
- if (!iatt)
- return 0;
+ if (!iatt)
+ return 0;
- size = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
- /* Return value of getxattr */
- if ((size == 16) || (size == -1))
- ret = 0;
- else
- ret = size;
+ size = sys_lgetxattr(path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ /* Return value of getxattr */
+ if ((size == 16) || (size == -1))
+ ret = 0;
+ else
+ ret = size;
- return ret;
+ return ret;
}
-
int
-posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt)
+posix_fill_gfid_fd(xlator_t *this, int fd, struct iatt *iatt)
{
- int ret = 0;
- ssize_t size = 0;
+ int ret = 0;
+ ssize_t size = 0;
- if (!iatt)
- return 0;
+ if (!iatt)
+ return 0;
- size = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
- /* Return value of getxattr */
- if ((size == 16) || (size == -1))
- ret = 0;
- else
- ret = size;
+ size = sys_fgetxattr(fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ /* Return value of getxattr */
+ if ((size == 16) || (size == -1))
+ ret = 0;
+ else
+ ret = size;
- return ret;
+ return ret;
}
void
-posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf)
+posix_fill_ino_from_gfid(xlator_t *this, struct iatt *buf)
{
- uint64_t temp_ino = 0;
- int j = 0;
- int i = 0;
-
- /* consider least significant 8 bytes of value out of gfid */
- if (uuid_is_null (buf->ia_gfid)) {
- buf->ia_ino = -1;
- goto out;
- }
- for (i = 15; i > (15 - 8); i--) {
- temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
- j += 8;
- }
- buf->ia_ino = temp_ino;
+ /* consider least significant 8 bytes of value out of gfid */
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ buf->ia_ino = -1;
+ goto out;
+ }
+ buf->ia_ino = gfid_to_ino(buf->ia_gfid);
+ buf->ia_flags |= IATT_INO;
out:
- return;
+ return;
}
int
-posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p)
+posix_fdstat(xlator_t *this, inode_t *inode, int fd, struct iatt *stbuf_p)
{
- int ret = 0;
- struct stat fstatbuf = {0, };
- struct iatt stbuf = {0, };
+ int ret = 0;
+ struct stat fstatbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
- ret = fstat (fd, &fstatbuf);
- if (ret == -1)
- goto out;
+ priv = this->private;
+
+ ret = sys_fstat(fd, &fstatbuf);
+ if (ret == -1)
+ goto out;
- if (fstatbuf.st_nlink && !S_ISDIR (fstatbuf.st_mode))
- fstatbuf.st_nlink--;
+ if (fstatbuf.st_nlink && !S_ISDIR(fstatbuf.st_mode))
+ fstatbuf.st_nlink--;
- iatt_from_stat (&stbuf, &fstatbuf);
+ iatt_from_stat(&stbuf, &fstatbuf);
- ret = posix_fill_gfid_fd (this, fd, &stbuf);
- if (ret)
- gf_log_callingfn (this->name, GF_LOG_DEBUG, "failed to get gfid");
+ if (inode && priv->ctime) {
+ ret = posix_get_mdata_xattr(this, NULL, fd, inode, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+ "posix get mdata failed on gfid: %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+ ret = posix_fill_gfid_fd(this, fd, &stbuf);
+ stbuf.ia_flags |= IATT_GFID;
- posix_fill_ino_from_gfid (this, &stbuf);
+ posix_fill_ino_from_gfid(this, &stbuf);
- if (stbuf_p)
- *stbuf_p = stbuf;
+ if (stbuf_p)
+ *stbuf_p = stbuf;
out:
- return ret;
+ return ret;
}
-
+/* The inode here is expected to update posix_mdata stored on disk.
+ * Don't use it as a general purpose inode and don't expect it to
+ * be always exists
+ */
int
-posix_istat (xlator_t *this, uuid_t gfid, const char *basename,
- struct iatt *buf_p)
+posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename,
+ struct iatt *buf_p)
{
- char *real_path = NULL;
- struct stat lstatbuf = {0, };
- struct iatt stbuf = {0, };
- int ret = 0;
- struct posix_private *priv = NULL;
-
-
- priv = this->private;
+ char *real_path = NULL;
+ struct stat lstatbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ MAKE_HANDLE_PATH(real_path, this, gfid, basename);
+ if (!real_path) {
+ gf_msg(this->name, GF_LOG_ERROR, ESTALE, P_MSG_HANDLE_PATH_CREATE,
+ "Failed to create handle path for %s/%s", uuid_utoa(gfid),
+ basename ? basename : "");
+ errno = ESTALE;
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_lstat(real_path, &lstatbuf);
+
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT && errno != ELOOP)
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", real_path);
+ } else {
+ // may be some backend filesystem issue
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s and return value is %d "
+ "instead of -1. Please see dmesg output to "
+ "check whether the failure is due to backend "
+ "filesystem issue",
+ real_path, ret);
+ ret = -1;
+ }
+ goto out;
+ }
- MAKE_HANDLE_PATH (real_path, this, gfid, basename);
+ if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
+ (lstatbuf.st_dev == priv->handledir.st_dev)) {
+ errno = ENOENT;
+ return -1;
+ }
- ret = lstat (real_path, &lstatbuf);
+ if (!S_ISDIR(lstatbuf.st_mode))
+ lstatbuf.st_nlink--;
- if (ret != 0) {
- if (ret == -1) {
- if (errno != ENOENT && errno != ELOOP)
- gf_log (this->name, GF_LOG_WARNING,
- "lstat failed on %s (%s)",
- real_path, strerror (errno));
- } else {
- // may be some backend filesystem issue
- gf_log (this->name, GF_LOG_ERROR, "lstat failed on "
- "%s and return value is %d instead of -1. "
- "Please see dmesg output to check whether the "
- "failure is due to backend filesystem issue",
- real_path, ret);
- ret = -1;
- }
- goto out;
- }
+ iatt_from_stat(&stbuf, &lstatbuf);
- if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
- (lstatbuf.st_dev == priv->handledir.st_dev)) {
- errno = ENOENT;
- return -1;
+ if (inode && priv->ctime) {
+ ret = posix_get_mdata_xattr(this, real_path, -1, inode, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+ "posix get mdata failed on %s", real_path);
+ goto out;
}
+ }
- if (!S_ISDIR (lstatbuf.st_mode))
- lstatbuf.st_nlink --;
-
- iatt_from_stat (&stbuf, &lstatbuf);
-
- if (basename)
- posix_fill_gfid_path (this, real_path, &stbuf);
- else
- uuid_copy (stbuf.ia_gfid, gfid);
+ if (basename)
+ posix_fill_gfid_path(this, real_path, &stbuf);
+ else
+ gf_uuid_copy(stbuf.ia_gfid, gfid);
+ stbuf.ia_flags |= IATT_GFID;
- posix_fill_ino_from_gfid (this, &stbuf);
+ posix_fill_ino_from_gfid(this, &stbuf);
- if (buf_p)
- *buf_p = stbuf;
+ if (buf_p)
+ *buf_p = stbuf;
out:
- return ret;
+ return ret;
}
-
-
int
-posix_pstat (xlator_t *this, uuid_t gfid, const char *path,
- struct iatt *buf_p)
+posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path,
+ struct iatt *buf_p, gf_boolean_t inode_locked)
{
- struct stat lstatbuf = {0, };
- struct iatt stbuf = {0, };
- int ret = 0;
- struct posix_private *priv = NULL;
+ struct stat lstatbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = 0;
+ int op_errno = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (gfid && !gf_uuid_is_null(gfid))
+ gf_uuid_copy(stbuf.ia_gfid, gfid);
+ else
+ posix_fill_gfid_path(this, path, &stbuf);
+ stbuf.ia_flags |= IATT_GFID;
+
+ ret = sys_lstat(path, &lstatbuf);
+ if (ret == -1) {
+ if (errno != ENOENT) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", path);
+ errno = op_errno; /*gf_msg could have changed errno*/
+ } else {
+ op_errno = errno;
+ gf_msg_debug(this->name, 0, "lstat failed on %s (%s)", path,
+ strerror(errno));
+ errno = op_errno; /*gf_msg could have changed errno*/
+ }
+ goto out;
+ }
+ if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
+ (lstatbuf.st_dev == priv->handledir.st_dev)) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ if (!S_ISDIR(lstatbuf.st_mode))
+ lstatbuf.st_nlink--;
+
+ iatt_from_stat(&stbuf, &lstatbuf);
+
+ if (priv->ctime) {
+ if (inode) {
+ if (!inode_locked) {
+ ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
+ } else {
+ ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+ "posix get mdata failed on gfid: %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ } else {
+ ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+ "posix get mdata failed on path: %s", path);
+ goto out;
+ }
+ }
+ }
- priv = this->private;
+ posix_fill_ino_from_gfid(this, &stbuf);
- ret = lstat (path, &lstatbuf);
+ if (buf_p)
+ *buf_p = stbuf;
+out:
+ return ret;
+}
- if (ret != 0) {
- if (ret == -1) {
- if (errno != ENOENT)
- gf_log (this->name, GF_LOG_WARNING,
- "lstat failed on %s (%s)",
- path, strerror (errno));
- } else {
- // may be some backend filesytem issue
- gf_log (this->name, GF_LOG_ERROR, "lstat failed on "
- "%s and return value is %d instead of -1. "
- "Please see dmesg output to check whether the "
- "failure is due to backend filesystem issue",
- path, ret);
- ret = -1;
- }
- goto out;
- }
+static void
+_get_list_xattr(posix_xattr_filler_t *filler)
+{
+ ssize_t size = 0;
- if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
- (lstatbuf.st_dev == priv->handledir.st_dev)) {
- errno = ENOENT;
- return -1;
- }
+ if ((!filler) || ((!filler->real_path) && (filler->fdnum < 0)))
+ goto out;
- if (!S_ISDIR (lstatbuf.st_mode))
- lstatbuf.st_nlink --;
+ if (filler->real_path)
+ size = sys_llistxattr(filler->real_path, NULL, 0);
+ else
+ size = sys_flistxattr(filler->fdnum, NULL, 0);
- iatt_from_stat (&stbuf, &lstatbuf);
+ if (size <= 0)
+ goto out;
- if (gfid && !uuid_is_null (gfid))
- uuid_copy (stbuf.ia_gfid, gfid);
- else
- posix_fill_gfid_path (this, path, &stbuf);
+ filler->list = GF_CALLOC(1, size, gf_posix_mt_char);
+ if (!filler->list)
+ goto out;
- posix_fill_ino_from_gfid (this, &stbuf);
+ if (filler->real_path)
+ size = sys_llistxattr(filler->real_path, filler->list, size);
+ else
+ size = sys_flistxattr(filler->fdnum, filler->list, size);
- if (buf_p)
- *buf_p = stbuf;
+ filler->list_size = size;
out:
- return ret;
+ return;
}
static void
-_handle_list_xattr (dict_t *xattr_req, const char *real_path,
- posix_xattr_filler_t *filler)
+_handle_list_xattr(posix_xattr_filler_t *filler)
{
- int ret = -1;
- ssize_t size = 0;
- char *list = NULL;
- int32_t list_offset = 0;
- size_t remaining_size = 0;
- char *key = NULL;
+ int32_t list_offset = 0;
+ ssize_t remaining_size = 0;
+ char *key = NULL;
+ int len;
- if (!real_path)
- goto out;
-
- size = sys_llistxattr (real_path, NULL, 0);
- if (size <= 0)
- goto out;
-
- list = alloca (size);
- if (!list)
- goto out;
+ remaining_size = filler->list_size;
+ while (remaining_size > 0) {
+ key = filler->list + list_offset;
+ len = strlen(key);
- size = sys_llistxattr (real_path, list, size);
- if (size <= 0)
- goto out;
+ if (gf_get_index_by_elem(list_xattr_ignore_xattrs, key) >= 0)
+ goto next;
- remaining_size = size;
- list_offset = 0;
- while (remaining_size > 0) {
- key = list + list_offset;
+ if (posix_special_xattr(marker_xattrs, key))
+ goto next;
- if (_is_in_array (list_xattr_ignore_xattrs, key))
- goto next;
+ if (posix_handle_georep_xattrs(NULL, key, NULL, _gf_false))
+ goto next;
- if (posix_special_xattr (marker_xattrs, key))
- goto next;
+ if (posix_is_gfid2path_xattr(key))
+ goto next;
- if (dict_get (filler->xattr, key))
- goto next;
+ if (dict_getn(filler->xattr, key, len))
+ goto next;
- ret = _posix_xattr_get_set_from_backend (filler, key);
-next:
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
+ (void)_posix_xattr_get_set_from_backend(filler, key);
+ next:
+ remaining_size -= (len + 1);
+ list_offset += (len + 1);
- } /* while (remaining_size > 0) */
-out:
- return;
+ } /* while (remaining_size > 0) */
+ return;
}
dict_t *
-posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc,
- dict_t *xattr_req, struct iatt *buf)
+posix_xattr_fill(xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd,
+ int fdnum, dict_t *xattr_req, struct iatt *buf)
{
- dict_t *xattr = NULL;
- posix_xattr_filler_t filler = {0, };
- gf_boolean_t list = _gf_false;
-
- if (dict_get (xattr_req, "list-xattr")) {
- dict_del (xattr_req, "list-xattr");
- list = _gf_true;
- }
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ gf_boolean_t list = _gf_false;
+
+ if (dict_get_sizen(xattr_req, "list-xattr")) {
+ dict_del_sizen(xattr_req, "list-xattr");
+ list = _gf_true;
+ }
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto out;
+ }
+
+ filler.this = this;
+ filler.real_path = real_path;
+ filler.xattr = xattr;
+ filler.stbuf = buf;
+ filler.loc = loc;
+ filler.fd = fd;
+ filler.fdnum = fdnum;
+
+ _get_list_xattr(&filler);
+ dict_foreach(xattr_req, _posix_xattr_get_set, &filler);
+ if (list)
+ _handle_list_xattr(&filler);
+
+ GF_FREE(filler.list);
+out:
+ return xattr;
+}
- xattr = get_new_dict();
- if (!xattr) {
- goto out;
- }
+void
+posix_gfid_unset(xlator_t *this, dict_t *xdata)
+{
+ uuid_t uuid = {
+ 0,
+ };
+ int ret = 0;
- filler.this = this;
- filler.real_path = real_path;
- filler.xattr = xattr;
- filler.stbuf = buf;
- filler.loc = loc;
+ if (xdata == NULL)
+ goto out;
- dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
- if (list)
- _handle_list_xattr (xattr_req, real_path, &filler);
+ ret = dict_get_gfuuid(xdata, "gfid-req", &uuid);
+ if (ret) {
+ goto out;
+ }
+ posix_handle_unset(this, uuid, NULL);
out:
- return xattr;
+ return;
}
-
int
-posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
+posix_gfid_set(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req,
+ pid_t pid, int *op_errno)
{
- void *uuid_req = NULL;
- uuid_t uuid_curr;
- int ret = 0;
- ssize_t size = 0;
- struct stat stat = {0, };
-
-
- if (!xattr_req)
- goto out;
-
- if (sys_lstat (path, &stat) != 0)
- goto out;
-
- size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
- if (size == 16) {
- ret = 0;
- goto verify_handle;
+ uuid_t uuid_req;
+ uuid_t uuid_curr;
+ int ret = 0;
+ ssize_t size = 0;
+ struct stat stat = {
+ 0,
+ };
+
+ *op_errno = 0;
+
+ if (!xattr_req) {
+ if (pid != GF_SERVER_PID_TRASH) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "xattr_req is null");
+ *op_errno = EINVAL;
+ ret = -1;
}
-
- ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get the gfid from dict for %s",
- loc->path);
- goto out;
- }
-
- ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "setting GFID on %s failed (%s)", path,
- strerror (errno));
- goto out;
- }
- uuid_copy (uuid_curr, uuid_req);
+ goto out;
+ }
+
+ if (sys_lstat(path, &stat) != 0) {
+ ret = -1;
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", path);
+ goto out;
+ }
+
+ size = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (size == 16) {
+ ret = 0;
+ goto verify_handle;
+ }
+
+ ret = dict_get_gfuuid(xattr_req, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to get the gfid from dict for %s",
+ loc->path);
+ *op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ if (gf_uuid_is_null(uuid_req)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID,
+ "gfid is null for %s", loc ? loc->path : "");
+ ret = -1;
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = sys_lsetxattr(path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_FAILED,
+ "setting GFID on %s failed ", path);
+ goto out;
+ }
+ gf_uuid_copy(uuid_curr, uuid_req);
verify_handle:
- if (!S_ISDIR (stat.st_mode))
- ret = posix_handle_hard (this, path, uuid_curr, &stat);
- else
- ret = posix_handle_soft (this, path, loc, uuid_curr, &stat);
+ if (!S_ISDIR(stat.st_mode))
+ ret = posix_handle_hard(this, path, uuid_curr, &stat);
+ else
+ ret = posix_handle_soft(this, path, loc, uuid_curr, &stat);
out:
- return ret;
+ if (ret && !(*op_errno))
+ *op_errno = errno;
+ return ret;
}
-
-int
-posix_set_file_contents (xlator_t *this, const char *path, char *keyp,
- data_t *value, int flags)
+#ifdef HAVE_SYS_ACL_H
+static int
+posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s)
{
- char * key = NULL;
- char real_path[PATH_MAX];
- int32_t file_fd = -1;
- int op_ret = 0;
- int ret = -1;
-
+ int ret = -1;
+ acl_t acl = NULL;
+ acl_type_t type = 0;
- /* XXX: does not handle assigning GFID to created files */
+ if ((!path) && (fdnum < 0)) {
+ errno = -EINVAL;
return -1;
+ }
- key = &(keyp[15]);
- sprintf (real_path, "%s/%s", path, key);
-
- if (flags & XATTR_REPLACE) {
- /* if file exists, replace it
- * else, error out */
- file_fd = open (real_path, O_TRUNC|O_WRONLY);
-
- if (file_fd == -1) {
- goto create;
- }
-
- if (value->len) {
- ret = write (file_fd, value->data, value->len);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "write failed while doing setxattr "
- "for key %s on path %s: %s",
- key, real_path, strerror (errno));
- goto out;
- }
-
- ret = close (file_fd);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "close failed on %s: %s",
- real_path, strerror (errno));
- goto out;
- }
- }
-
- create: /* we know file doesn't exist, create it */
+ type = gf_posix_acl_get_type(key);
+ if (!type)
+ return -1;
- file_fd = open (real_path, O_CREAT|O_WRONLY, 0644);
+ acl = acl_from_text(acl_s);
+ if (!acl)
+ return -1;
- if (file_fd == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s with O_CREAT: %s",
- key, strerror (errno));
- goto out;
- }
+ if (path)
+ ret = acl_set_file(path, type, acl);
+ else if (type == ACL_TYPE_ACCESS)
+ ret = acl_set_fd(fdnum, acl);
+ else {
+ errno = -EINVAL;
+ return -1;
+ }
- ret = write (file_fd, value->data, value->len);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "write failed on %s while setxattr with "
- "key %s: %s",
- real_path, key, strerror (errno));
- goto out;
- }
+ if (ret)
+ /* posix_handle_pair expects ret to be the errno */
+ ret = -errno;
- ret = close (file_fd);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "close failed on %s while setxattr with "
- "key %s: %s",
- real_path, key, strerror (errno));
- goto out;
- }
- }
+ acl_free(acl);
-out:
- return op_ret;
+ return ret;
}
-
int
-posix_get_file_contents (xlator_t *this, uuid_t pargfid,
- const char *name, char **contents)
+posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s)
{
- char *real_path = NULL;
- int32_t file_fd = -1;
- struct iatt stbuf = {0,};
- int op_ret = 0;
- int ret = -1;
-
+ int ret = -1;
+ acl_t acl = NULL;
+ acl_type_t type = 0;
+ char *acl_tmp = NULL;
- MAKE_HANDLE_PATH (real_path, this, pargfid, name);
-
- op_ret = posix_istat (this, pargfid, name, &stbuf);
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s",
- real_path, strerror (errno));
- goto out;
- }
+ if ((!path) && (fdnum < 0)) {
+ errno = -EINVAL;
+ return -1;
+ }
- file_fd = open (real_path, O_RDONLY);
+ type = gf_posix_acl_get_type(key);
+ if (!type)
+ return -1;
- if (file_fd == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s",
- real_path, strerror (errno));
- goto out;
- }
+ if (path)
+ acl = acl_get_file(path, type);
+ else if (type == ACL_TYPE_ACCESS)
+ acl = acl_get_fd(fdnum);
+ else {
+ errno = -EINVAL;
+ return -1;
+ }
- *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char),
- gf_posix_mt_char);
- if (! *contents) {
- op_ret = -errno;
- goto out;
- }
+ if (!acl)
+ return -1;
- ret = read (file_fd, *contents, stbuf.ia_size);
- if (ret <= 0) {
- op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s",
- real_path, strerror (errno));
- goto out;
- }
+#ifdef HAVE_ACL_LIBACL_H
+ acl_tmp = acl_to_any_text(acl, NULL, ',',
+ TEXT_ABBREVIATE | TEXT_NUMERIC_IDS);
+#else /* FreeBSD and the like */
+ acl_tmp = acl_to_text_np(acl, NULL, ACL_TEXT_NUMERIC_IDS);
+#endif
+ if (!acl_tmp)
+ goto free_acl;
- *contents[stbuf.ia_size] = '\0';
+ *acl_s = gf_strdup(acl_tmp);
+ if (*acl_s)
+ ret = 0;
- op_ret = close (file_fd);
- file_fd = -1;
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s",
- real_path, strerror (errno));
- goto out;
- }
+ acl_free(acl_tmp);
+free_acl:
+ acl_free(acl);
-out:
- if (op_ret < 0) {
- GF_FREE (*contents);
- if (file_fd != -1)
- close (file_fd);
- }
+ return ret;
+}
+#else /* !HAVE_SYS_ACL_H (NetBSD) */
+int
+posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s)
+{
+ errno = ENOTSUP;
+ return -1;
+}
- return op_ret;
+int
+posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s)
+{
+ errno = ENOTSUP;
+ return -1;
}
+#endif
#ifdef GF_DARWIN_HOST_OS
-static
-void posix_dump_buffer (xlator_t *this, const char *real_path, const char *key,
- data_t *value, int flags)
-{
- char buffer[3*value->len+1];
- int index = 0;
- buffer[0] = 0;
- gf_loglevel_t log_level = gf_log_get_loglevel ();
- if (log_level == GF_LOG_TRACE) {
- char *data = (char *) value->data;
- for (index = 0; index < value->len; index++)
- sprintf(buffer+3*index, " %02x", data[index]);
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "Dump %s: key:%s flags: %u length:%u data:%s ",
- real_path, key, flags, value->len,
- (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>"));
+static void
+posix_dump_buffer(xlator_t *this, const char *real_path, const char *key,
+ data_t *value, int flags)
+{
+ char buffer[3 * value->len + 1];
+ int index = 0;
+ buffer[0] = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel();
+ if (log_level == GF_LOG_TRACE) {
+ char *data = (char *)value->data;
+ for (index = 0; index < value->len; index++)
+ sprintf(buffer + 3 * index, " %02x", data[index]);
+ }
+ gf_msg_debug(this->name, 0, "Dump %s: key:%s flags: %u length:%u data:%s ",
+ real_path, key, flags, value->len,
+ (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>"));
}
#endif
-static int gf_xattr_enotsup_log;
-
int
-posix_handle_pair (xlator_t *this, const char *real_path,
- char *key, data_t *value, int flags)
+posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
+ data_t *value, int flags, struct iatt *stbuf)
{
- int sys_ret = -1;
- int ret = 0;
-
- if (XATTR_IS_PATHINFO (key)) {
- ret = -EACCES;
- goto out;
- } else if (ZR_FILE_CONTENT_REQUEST(key)) {
- ret = posix_set_file_contents (this, real_path, key, value,
- flags);
- } else {
- sys_ret = sys_lsetxattr (real_path, key, value->data,
- value->len, flags);
+ int sys_ret = -1;
+ int ret = 0;
+ int op_errno = 0;
+ struct mdata_iatt mdata_iatt = {
+ 0,
+ };
#ifdef GF_DARWIN_HOST_OS
- posix_dump_buffer(this, real_path, key, value, flags);
+ const int error_code = EINVAL;
+#else
+ const int error_code = EEXIST;
#endif
- if (sys_ret < 0) {
- ret = -errno;
- if (errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting "
- "brick with 'user_xattr' "
- "flag)");
- } else if (errno == ENOENT) {
- if (!posix_special_xattr (marker_xattrs,
- key)) {
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr on %s failed: %s",
- real_path, strerror (errno));
- }
- } else {
+ if (XATTR_IS_PATHINFO(key)) {
+ ret = -EACCES;
+ goto out;
+ } else if (posix_is_gfid2path_xattr(key)) {
+ ret = -ENOTSUP;
+ goto out;
+ } else if (GF_POSIX_ACL_REQUEST(key)) {
+ if (stbuf && IS_DHT_LINKFILE_MODE(stbuf))
+ goto out;
+ ret = posix_pacl_set(real_path, -1, key, value->data);
+ } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR,
+ SLEN(POSIX_ACL_ACCESS_XATTR)) &&
+ stbuf && IS_DHT_LINKFILE_MODE(stbuf)) {
+ goto out;
+ } else if (!strncmp(key, GF_INTERNAL_CTX_KEY, SLEN(GF_INTERNAL_CTX_KEY))) {
+ /* ignore this key value pair */
+ ret = 0;
+ goto out;
+ } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) {
+ /* This is either by rebalance or self heal. Create the xattr if it's
+ * not present. Compare and update the larger value if the xattr is
+ * already present.
+ */
+ if (loc == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+ posix_mdata_iatt_from_disk(&mdata_iatt,
+ (posix_mdata_disk_t *)value->data);
+ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
+ &mdata_iatt, &op_errno);
+ if (ret != 0) {
+ ret = -op_errno;
+ }
+ goto out;
+ } else {
+ sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags);
#ifdef GF_DARWIN_HOST_OS
- gf_log (this->name,
- ((errno == EINVAL) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "%s: key:%s flags: %u length:%d error:%s",
- real_path, key, flags, value->len,
- strerror (errno));
-#else /* ! DARWIN */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: key:%s flags: %u length:%d error:%s",
- real_path, key, flags, value->len,
- strerror (errno));
-#endif /* DARWIN */
- }
-
- goto out;
+ posix_dump_buffer(this, real_path, key, value, flags);
+#endif
+ if (sys_ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ if (!posix_special_xattr(marker_xattrs, key)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "setxattr on %s failed", real_path);
+ }
+ } else {
+ if (errno == error_code) {
+ gf_msg_debug(this->name, 0,
+ "%s: key:%s"
+ "flags: %u length:%d",
+ real_path, key, flags, value->len);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "%s: key:%s"
+ "flags: %u length:%d",
+ real_path, key, flags, value->len);
}
+ }
+
+ goto out;
}
+ }
out:
- return ret;
+ return ret;
}
int
-posix_fhandle_pair (xlator_t *this, int fd,
- char *key, data_t *value, int flags)
+posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key,
+ data_t *value, int flags, struct iatt *stbuf, fd_t *_fd)
{
- int sys_ret = -1;
- int ret = 0;
-
- if (XATTR_IS_PATHINFO (key)) {
- ret = -EACCES;
- goto out;
+ int sys_ret = -1;
+ int ret = 0;
+
+ if (XATTR_IS_PATHINFO(key)) {
+ ret = -EACCES;
+ goto out;
+ } else if (posix_is_gfid2path_xattr(key)) {
+ ret = -ENOTSUP;
+ goto out;
+ } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR,
+ SLEN(POSIX_ACL_ACCESS_XATTR)) &&
+ stbuf && IS_DHT_LINKFILE_MODE(stbuf)) {
+ goto out;
+ }
+
+ sys_ret = sys_fsetxattr(fd, key, value->data, value->len, flags);
+
+ if (sys_ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fsetxattr on fd=%d"
+ " failed",
+ fd);
+ } else {
+#ifdef GF_DARWIN_HOST_OS
+ if (errno == EINVAL) {
+ gf_msg_debug(this->name, 0,
+ "fd=%d: key:%s "
+ "error:%s",
+ fd, key, strerror(errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fd=%d: key:%s", fd, key);
+ }
+
+#else /* ! DARWIN */
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fd=%d: key:%s", fd, key);
+#endif /* DARWIN */
}
- sys_ret = sys_fsetxattr (fd, key, value->data,
- value->len, flags);
-
- if (sys_ret < 0) {
- ret = -errno;
- if (errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting "
- "brick with 'user_xattr' "
- "flag)");
- } else if (errno == ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr on fd=%d failed: %s", fd,
- strerror (errno));
- } else {
+ goto out;
+ } else if (_fd) {
+ posix_set_ctime(frame, this, NULL, fd, _fd->inode, NULL);
+ }
-#ifdef GF_DARWIN_HOST_OS
- gf_log (this->name,
- ((errno == EINVAL) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "fd=%d: key:%s error:%s",
- fd, key, strerror (errno));
-#else /* ! DARWIN */
- gf_log (this->name, GF_LOG_ERROR,
- "fd=%d: key:%s error:%s",
- fd, key, strerror (errno));
-#endif /* DARWIN */
- }
+out:
+ return ret;
+}
- goto out;
+static void
+del_stale_dir_handle(xlator_t *this, uuid_t gfid)
+{
+ char newpath[PATH_MAX] = {
+ 0,
+ };
+ uuid_t gfid_curr = {
+ 0,
+ };
+ ssize_t size = -1;
+ gf_boolean_t stale = _gf_false;
+ char *hpath = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ struct iatt iabuf = {
+ 0,
+ };
+
+ MAKE_HANDLE_GFID_PATH(hpath, this, gfid);
+
+ /* check that it is valid directory handle */
+ size = sys_lstat(hpath, &stbuf);
+ if (size < 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: Handle stat failed: "
+ "%s",
+ hpath, strerror(errno));
+ goto out;
+ }
+
+ iatt_from_stat(&iabuf, &stbuf);
+ if (iabuf.ia_nlink != 1 || !IA_ISLNK(iabuf.ia_type)) {
+ gf_msg_debug(this->name, 0, "%s: Handle nlink %d %d", hpath,
+ iabuf.ia_nlink, IA_ISLNK(iabuf.ia_type));
+ goto out;
+ }
+
+ size = posix_handle_path(this, gfid, NULL, newpath, sizeof(newpath));
+ if (size <= 0) {
+ if (errno == ENOENT) {
+ gf_msg_debug(this->name, 0, "%s: %s", newpath, strerror(ENOENT));
+ stale = _gf_true;
}
+ goto out;
+ }
+
+ size = sys_lgetxattr(newpath, GFID_XATTR_KEY, gfid_curr, 16);
+ if (size < 0 && errno == ENOENT) {
+ gf_msg_debug(this->name, 0, "%s: %s", newpath, strerror(ENOENT));
+ stale = _gf_true;
+ } else if (size == 16 && gf_uuid_compare(gfid, gfid_curr)) {
+ gf_msg_debug(this->name, 0,
+ "%s: mismatching gfid: %s, "
+ "at %s",
+ hpath, uuid_utoa(gfid_curr), newpath);
+ stale = _gf_true;
+ }
out:
- return ret;
+ if (stale) {
+ size = sys_unlink(hpath);
+ if (size < 0 && errno != ENOENT)
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ P_MSG_STALE_HANDLE_REMOVE_FAILED,
+ "%s: Failed"
+ "to remove handle to %s",
+ hpath, newpath);
+ } else if (size == 16) {
+ gf_msg_debug(this->name, 0,
+ "%s: Fresh handle for "
+ "%s with gfid %s",
+ hpath, newpath, uuid_utoa(gfid_curr));
+ }
+ return;
}
-
static int
-janitor_walker (const char *fpath, const struct stat *sb,
- int typeflag, struct FTW *ftwbuf)
+janitor_walker(const char *fpath, const struct stat *sb, int typeflag,
+ struct FTW *ftwbuf)
{
- struct iatt stbuf = {0, };
- xlator_t *this = NULL;
-
- this = THIS;
- posix_pstat (this, NULL, fpath, &stbuf);
- switch (sb->st_mode & S_IFMT) {
+ struct iatt stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ /* posix_mdata_t is not filled, no time or size attributes
+ * are being used, so fine.
+ */
+ posix_pstat(this, NULL, NULL, fpath, &stbuf, _gf_false);
+ switch (sb->st_mode & S_IFMT) {
case S_IFREG:
case S_IFBLK:
case S_IFLNK:
case S_IFCHR:
case S_IFIFO:
case S_IFSOCK:
- gf_log (THIS->name, GF_LOG_TRACE,
- "unlinking %s", fpath);
- unlink (fpath);
- if (stbuf.ia_nlink == 1)
- posix_handle_unset (this, stbuf.ia_gfid, NULL);
- break;
+ gf_msg_trace(THIS->name, 0, "unlinking %s", fpath);
+ sys_unlink(fpath);
+ if (stbuf.ia_nlink == 1)
+ posix_handle_unset(this, stbuf.ia_gfid, NULL);
+ break;
case S_IFDIR:
- if (ftwbuf->level) { /* don't remove top level dir */
- gf_log (THIS->name, GF_LOG_TRACE,
- "removing directory %s", fpath);
+ if (ftwbuf->level) { /* don't remove top level dir */
+ gf_msg_debug(THIS->name, 0, "removing directory %s", fpath);
- rmdir (fpath);
- posix_handle_unset (this, stbuf.ia_gfid, NULL);
- }
- break;
- }
+ sys_rmdir(fpath);
+ del_stale_dir_handle(this, stbuf.ia_gfid);
+ }
+ break;
+ }
- return 0; /* 0 = FTW_CONTINUE */
+ return 0; /* 0 = FTW_CONTINUE */
}
+void
+__posix_janitor_timer_start(xlator_t *this);
-static struct posix_fd *
-janitor_get_next_fd (xlator_t *this)
+static int
+posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
{
- struct posix_private *priv = NULL;
- struct posix_fd *pfd = NULL;
-
- struct timespec timeout;
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+
+ this = data;
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->janitor_mutex);
+ {
+ if (priv->janitor_task_stop) {
+ priv->janitor_task_stop = _gf_false;
+ pthread_cond_signal(&priv->janitor_cond);
+ pthread_mutex_unlock(&priv->janitor_mutex);
+ goto out;
+ }
+ }
+ pthread_mutex_unlock(&priv->janitor_mutex);
- priv = this->private;
+ LOCK(&priv->lock);
+ {
+ __posix_janitor_timer_start(this);
+ }
+ UNLOCK(&priv->lock);
- pthread_mutex_lock (&priv->janitor_lock);
- {
- if (list_empty (&priv->janitor_fds)) {
- time (&timeout.tv_sec);
- timeout.tv_sec += priv->janitor_sleep_duration;
- timeout.tv_nsec = 0;
-
- pthread_cond_timedwait (&priv->janitor_cond,
- &priv->janitor_lock,
- &timeout);
- goto unlock;
- }
+out:
+ return 0;
+}
- pfd = list_entry (priv->janitor_fds.next, struct posix_fd,
- list);
+static int
+posix_janitor_task(void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ xlator_t *old_this = NULL;
+
+ time_t now;
+
+ this = data;
+ priv = this->private;
+ /* We need THIS to be set for janitor_walker */
+ old_this = THIS;
+ THIS = this;
+
+ if (!priv)
+ goto out;
+
+ now = gf_time();
+ if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
+ if (priv->disable_landfill_purge) {
+ gf_msg_debug(this->name, 0,
+ "Janitor would have "
+ "cleaned out %s, but purge"
+ "is disabled.",
+ priv->trash_path);
+ } else {
+ gf_msg_trace(this->name, 0, "janitor cleaning out %s",
+ priv->trash_path);
- list_del (priv->janitor_fds.next);
+ nftw(priv->trash_path, janitor_walker, 32, FTW_DEPTH | FTW_PHYS);
}
-unlock:
- pthread_mutex_unlock (&priv->janitor_lock);
+ priv->last_landfill_check = now;
+ }
- return pfd;
-}
+ THIS = old_this;
+out:
+ return 0;
+}
-static void *
-posix_janitor_thread_proc (void *data)
+static void
+posix_janitor_task_initator(struct gf_tw_timer_list *timer, void *data,
+ unsigned long calltime)
{
- xlator_t * this = NULL;
- struct posix_private *priv = NULL;
- struct posix_fd *pfd;
+ xlator_t *this = NULL;
+ int ret = 0;
- time_t now;
+ this = data;
- this = data;
- priv = this->private;
+ ret = synctask_new(this->ctx->env, posix_janitor_task,
+ posix_janitor_task_done, NULL, this);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
+ "spawning janitor "
+ "thread failed");
+ }
- THIS = this;
+ return;
+}
- while (1) {
- time (&now);
- if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor cleaning out %s", priv->trash_path);
+void
+__posix_janitor_timer_start(xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ struct gf_tw_timer_list *timer = NULL;
- nftw (priv->trash_path,
- janitor_walker,
- 32,
- FTW_DEPTH | FTW_PHYS);
+ priv = this->private;
+ timer = priv->janitor;
- priv->last_landfill_check = now;
- }
+ INIT_LIST_HEAD(&timer->entry);
+ timer->expires = priv->janitor_sleep_duration;
+ timer->function = posix_janitor_task_initator;
+ timer->data = this;
+ gf_tw_add_timer(glusterfs_ctx_tw_get(this->ctx), timer);
- pfd = janitor_get_next_fd (this);
- if (pfd) {
- if (pfd->dir == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor: closing file fd=%d", pfd->fd);
- close (pfd->fd);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor: closing dir fd=%p", pfd->dir);
- closedir (pfd->dir);
- }
-
- GF_FREE (pfd);
- }
+ return;
+}
+
+void
+posix_janitor_timer_start(xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ struct gf_tw_timer_list *timer = NULL;
+
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->janitor) {
+ timer = GF_CALLOC(1, sizeof(struct gf_tw_timer_list),
+ gf_common_mt_tw_timer_list);
+ if (!timer) {
+ goto unlock;
+ }
+ priv->janitor = timer;
+ __posix_janitor_timer_start(this);
}
+ }
+unlock:
+ UNLOCK(&priv->lock);
- return NULL;
+ return;
}
+static struct posix_fd *
+janitor_get_next_fd(glusterfs_ctx_t *ctx)
+{
+ struct posix_fd *pfd = NULL;
-void
-posix_spawn_janitor_thread (xlator_t *this)
+ while (list_empty(&ctx->janitor_fds)) {
+ if (ctx->pxl_count == 0) {
+ return NULL;
+ }
+
+ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
+ }
+
+ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
+ list_del_init(&pfd->list);
+
+ return pfd;
+}
+
+static void
+posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
{
- struct posix_private *priv = NULL;
- int ret = 0;
+ THIS = xl;
- priv = this->private;
+ if (pfd->dir == NULL) {
+ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
+ sys_close(pfd->fd);
+ } else {
+ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+ sys_closedir(pfd->dir);
+ }
- LOCK (&priv->lock);
- {
- if (!priv->janitor_present) {
- ret = gf_thread_create (&priv->janitor, NULL,
- posix_janitor_thread_proc, this);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "spawning janitor thread failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- priv->janitor_present = _gf_true;
- }
- }
-unlock:
- UNLOCK (&priv->lock);
+ GF_FREE(pfd);
}
-static int
-is_fresh_file (struct stat *stat)
+static void *
+posix_ctx_janitor_thread_proc(void *data)
{
- struct timeval tv;
+ xlator_t *xl;
+ struct posix_fd *pfd;
+ glusterfs_ctx_t *ctx = NULL;
+ struct posix_private *priv_fd;
- gettimeofday (&tv, NULL);
+ ctx = data;
- if ((stat->st_ctime >= (tv.tv_sec - 1))
- && (stat->st_ctime <= tv.tv_sec))
- return 1;
+ pthread_mutex_lock(&ctx->fd_lock);
- return 0;
-}
+ while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
+ pthread_mutex_unlock(&ctx->fd_lock);
+ xl = pfd->xl;
+ posix_close_pfd(xl, pfd);
-int
-posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
-{
- /* The purpose of this function is to prevent a race
- where an inode creation FOP (like mkdir/mknod/create etc)
- races with lookup in the following way:
-
- {create thread} | {lookup thread}
- |
- t0
- mkdir ("name") |
- t1
- | posix_gfid_set ("name", 2);
- t2
- posix_gfid_set ("name", 1); |
- t3
- lstat ("name"); | lstat ("name");
-
- In the above case mkdir FOP would have resulted with GFID 2 while
- it should have been GFID 1. It matters in the case where GFID would
- have gotten set to 1 on other subvolumes of replciate/distribute
-
- The "solution" here is that, if we detect lookup is attempting to
- set a GFID on a file which is created very recently, but does not
- yet have a GFID (i.e, between t1 and t2), then "fake" it as though
- posix_gfid_heal was called at t0 instead.
- */
-
- uuid_t uuid_curr;
- int ret = 0;
- struct stat stat = {0, };
-
- if (!xattr_req)
- goto out;
+ pthread_mutex_lock(&ctx->fd_lock);
- if (sys_lstat (path, &stat) != 0)
- goto out;
+ priv_fd = xl->private;
+ priv_fd->rel_fdcount--;
+ if (!priv_fd->rel_fdcount)
+ pthread_cond_signal(&priv_fd->fd_cond);
+ }
- ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
- if (ret != 16) {
- if (is_fresh_file (&stat)) {
- ret = -1;
- errno = ENOENT;
- goto out;
- }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ return NULL;
+}
+
+int
+posix_spawn_ctx_janitor_thread(xlator_t *this)
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ if (ctx->pxl_count++ == 0) {
+ ret = gf_thread_create(&ctx->janitor, NULL,
+ posix_ctx_janitor_thread_proc, ctx,
+ "posixctxjan");
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
+ "spawning janitor thread failed");
+ ctx->pxl_count--;
+ }
}
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
- ret = posix_gfid_set (this, path, loc, xattr_req);
-out:
- return ret;
+ return ret;
}
+static int
+is_fresh_file(struct timespec *ts)
+{
+ struct timespec now;
+ int64_t elapsed;
+
+ timespec_now_realtime(&now);
+ elapsed = (int64_t)gf_tsdiff(ts, &now);
+
+ if (elapsed < 0) {
+ /* The file has been modified in the future !!!
+ * Is it fresh ? previous implementation considered this as a
+ * non-fresh file, so maintaining the same behavior. */
+ return 0;
+ }
+
+ /* If the file is newer than a second, we consider it fresh. */
+ return elapsed < 1000000;
+}
int
-posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req)
+posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
{
- int ret = 0;
- data_t *data = NULL;
- struct stat stat = {0, };
+ /* The purpose of this function is to prevent a race
+ where an inode creation FOP (like mkdir/mknod/create etc)
+ races with lookup in the following way:
+
+ {create thread} | {lookup thread}
+ |
+ t0
+ mkdir ("name") |
+ t1
+ | posix_gfid_set ("name", 2);
+ t2
+ posix_gfid_set ("name", 1); |
+ t3
+ lstat ("name"); | lstat ("name");
+
+ In the above case mkdir FOP would have resulted with GFID 2 while
+ it should have been GFID 1. It matters in the case where GFID would
+ have gotten set to 1 on other subvolumes of replciate/distribute
+
+ The "solution" here is that, if we detect lookup is attempting to
+ set a GFID on a file which is created very recently, but does not
+ yet have a GFID (i.e, between t1 and t2), then "fake" it as though
+ posix_gfid_heal was called at t0 instead.
+ */
+
+ uuid_t uuid_curr;
+ int ret = 0;
+ struct stat stat = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (!xattr_req)
+ return 0;
- if (!xattr_req)
- goto out;
+ if (loc->inode && priv->ctime) {
+ if (sys_lstat(path, &stat) != 0) {
+ return -errno;
+ }
+ /* stbuf is only to compare ctime, don't use it to access
+ * other fields as they are zero. */
+ ret = posix_get_mdata_xattr(this, path, -1, loc->inode, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
+ "posix get mdata failed on gfid: %s",
+ uuid_utoa(loc->inode->gfid));
+ return -ENOENT;
+ }
+ ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (ret != 16) {
+ /* TODO: This is a very hacky way of doing this, and very prone to
+ * errors and unexpected behavior. This should be changed. */
+ struct timespec ts = {.tv_sec = stbuf.ia_ctime,
+ .tv_nsec = stbuf.ia_ctime_nsec};
+ if (is_fresh_file(&ts)) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
+ "Fresh file: %s", path);
+ return -ENOENT;
+ }
+ }
+ } else {
+ if (sys_lstat(path, &stat) != 0) {
+ return -errno;
+ }
+ ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (ret != 16) {
+ /* TODO: This is a very hacky way of doing this, and very prone to
+ * errors and unexpected behavior. This should be changed. */
+ if (is_fresh_file(&stat.st_ctim)) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
+ "Fresh file: %s", path);
+ return -ENOENT;
+ }
+ }
+ }
- if (sys_lstat (path, &stat) != 0)
- goto out;
+ (void)posix_gfid_set(this, path, loc, xattr_req, GF_CLIENT_PID_MAX, &ret);
+ return 0;
+}
- data = dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR);
- if (data) {
- ret = sys_lsetxattr (path, POSIX_ACL_ACCESS_XATTR,
- data->data, data->len, 0);
- if (ret != 0)
- goto out;
+int
+posix_acl_xattr_set(xlator_t *this, const char *path, dict_t *xattr_req)
+{
+ int ret = 0;
+ data_t *data = NULL;
+ struct stat stat = {
+ 0,
+ };
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat(path, &stat) != 0)
+ goto out;
+
+ data = dict_get(xattr_req, POSIX_ACL_ACCESS_XATTR);
+ if (data) {
+ ret = sys_lsetxattr(path, POSIX_ACL_ACCESS_XATTR, data->data, data->len,
+ 0);
+#ifdef __FreeBSD__
+ if (ret != -1) {
+ ret = 0;
}
-
- data = dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR);
- if (data) {
- ret = sys_lsetxattr (path, POSIX_ACL_DEFAULT_XATTR,
- data->data, data->len, 0);
- if (ret != 0)
- goto out;
+#endif /* __FreeBSD__ */
+ if (ret != 0)
+ goto out;
+ }
+
+ data = dict_get(xattr_req, POSIX_ACL_DEFAULT_XATTR);
+ if (data) {
+ ret = sys_lsetxattr(path, POSIX_ACL_DEFAULT_XATTR, data->data,
+ data->len, 0);
+#ifdef __FreeBSD__
+ if (ret != -1) {
+ ret = 0;
}
+#endif /* __FreeBSD__ */
+ if (ret != 0)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
static int
-_handle_entry_create_keyvalue_pair (dict_t *d, char *k, data_t *v,
- void *tmp)
+_handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
{
- int ret = -1;
- posix_xattr_filler_t *filler = NULL;
-
- filler = tmp;
+ int ret = -1;
+ posix_xattr_filler_t *filler = NULL;
- if (!strcmp (GFID_XATTR_KEY, k) ||
- !strcmp ("gfid-req", k) ||
- !strcmp (POSIX_ACL_DEFAULT_XATTR, k) ||
- !strcmp (POSIX_ACL_ACCESS_XATTR, k) ||
- ZR_FILE_CONTENT_REQUEST(k)) {
- return 0;
- }
+ filler = tmp;
- ret = posix_handle_pair (filler->this, filler->real_path, k, v,
- XATTR_CREATE);
- if (ret < 0) {
- errno = -ret;
- return -1;
- }
+ if (!strcmp(GFID_XATTR_KEY, k) || !strcmp("gfid-req", k) ||
+ !strcmp(POSIX_ACL_DEFAULT_XATTR, k) ||
+ !strcmp(POSIX_ACL_ACCESS_XATTR, k) || posix_xattr_ignorable(k)) {
return 0;
+ }
+
+ ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
+ XATTR_CREATE, filler->stbuf);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+ }
+ return 0;
}
int
-posix_entry_create_xattr_set (xlator_t *this, const char *path,
+posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
dict_t *dict)
{
- int ret = -1;
+ int ret = -1;
- posix_xattr_filler_t filler = {0,};
+ posix_xattr_filler_t filler = {
+ 0,
+ };
- if (!dict)
- goto out;
+ if (!dict)
+ goto out;
- filler.this = this;
- filler.real_path = path;
+ filler.this = this;
+ filler.real_path = path;
+ filler.stbuf = NULL;
+ filler.loc = loc;
- ret = dict_foreach (dict, _handle_entry_create_keyvalue_pair, &filler);
+ ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler);
out:
- return ret;
+ return ret;
}
-
static int
-__posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p)
-{
- uint64_t tmp_pfd = 0;
- struct posix_fd *pfd = NULL;
- int ret = -1;
- char *real_path = NULL;
- int _fd = -1;
- DIR *dir = NULL;
-
- ret = __fd_ctx_get (fd, this, &tmp_pfd);
- if (ret == 0) {
- pfd = (void *)(long) tmp_pfd;
- ret = 0;
- goto out;
+__posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd_p,
+ int *op_errno_p)
+{
+ uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ char *real_path = NULL;
+ char *unlink_path = NULL;
+ int _fd = -1;
+ int op_errno = 0;
+ DIR *dir = NULL;
+
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ ret = __fd_ctx_get(fd, this, &tmp_pfd);
+ if (ret == 0) {
+ pfd = (void *)(long)tmp_pfd;
+ goto out;
+ }
+ if (!fd_is_anonymous(fd)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_READ_FAILED,
+ "Failed to get fd context for a non-anonymous fd, "
+ "gfid: %s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ MAKE_HANDLE_PATH(real_path, this, fd->inode->gfid, NULL);
+ if (!real_path) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_READ_FAILED,
+ "Failed to create handle path (%s)", uuid_utoa(fd->inode->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ pfd->fd = -1;
+
+ if (fd->inode->ia_type == IA_IFDIR) {
+ dir = sys_opendir(real_path);
+ if (!dir) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_READ_FAILED,
+ "Failed to get anonymous fd for "
+ "real_path: %s.",
+ real_path);
+ GF_FREE(pfd);
+ pfd = NULL;
+ goto out;
+ }
+ _fd = dirfd(dir);
+ }
+
+ /* Using fd->flags in case we choose to have anonymous
+ * fds with different flags some day. As of today it
+ * would be GF_ANON_FD_FLAGS and nothing else.
+ */
+ if (fd->inode->ia_type == IA_IFREG) {
+ _fd = open(real_path, fd->flags);
+ if ((_fd == -1) && (errno == ENOENT)) {
+ POSIX_GET_FILE_UNLINK_PATH(priv->base_path, fd->inode->gfid,
+ unlink_path);
+ _fd = open(unlink_path, fd->flags);
+ }
+ if (_fd == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_READ_FAILED,
+ "Failed to get anonymous fd for "
+ "real_path: %s.",
+ real_path);
+ GF_FREE(pfd);
+ pfd = NULL;
+ goto out;
}
+ }
+
+ pfd->fd = _fd;
+ pfd->dir = dir;
+ pfd->flags = fd->flags;
+
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ if (_fd != -1)
+ sys_close(_fd);
+ if (dir)
+ sys_closedir(dir);
+ GF_FREE(pfd);
+ pfd = NULL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0 && op_errno_p)
+ *op_errno_p = op_errno;
- if (!fd_is_anonymous(fd))
- /* anonymous fd */
- goto out;
+ if (pfd_p)
+ *pfd_p = pfd;
+ return ret;
+}
+
+int
+posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, int *op_errno)
+{
+ int ret;
- MAKE_HANDLE_PATH (real_path, this, fd->inode->gfid, NULL);
+ LOCK(&fd->inode->lock);
+ {
+ ret = __posix_fd_ctx_get(fd, this, pfd, op_errno);
+ }
+ UNLOCK(&fd->inode->lock);
- pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
- if (!pfd) {
- goto out;
+ return ret;
+}
+
+static int
+posix_fs_health_check(xlator_t *this, char *file_path)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+ char timestamp[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ int fd = -1;
+ int timelen = -1;
+ time_t time_sec = {
+ 0,
+ };
+ char buff[256] = {0};
+ char *op = NULL;
+ int op_errno = 0;
+ int cnt;
+ int timeout = 0;
+ struct aiocb aiocb;
+
+ priv = this->private;
+
+ timeout = priv->health_check_timeout;
+
+ fd = open(file_path, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (fd == -1) {
+ op_errno = errno;
+ op = "open_for_write";
+ goto out;
+ }
+
+ time_sec = gf_time();
+ gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT);
+ timelen = strlen(timestamp);
+
+ memset(&aiocb, 0, sizeof(struct aiocb));
+ aiocb.aio_fildes = fd;
+ aiocb.aio_buf = timestamp;
+ aiocb.aio_nbytes = timelen;
+ aiocb.aio_sigevent.sigev_notify = SIGEV_NONE;
+ if (aio_write(&aiocb) == -1) {
+ op_errno = errno;
+ op = "aio_write";
+ goto out;
+ }
+
+ cnt = 0;
+ /* Wait until write completion */
+ while ((aio_error(&aiocb) == EINPROGRESS) && (++cnt <= timeout))
+ sleep(1);
+
+ ret = aio_error(&aiocb);
+ if (ret != 0) {
+ op_errno = errno;
+ op = "aio_write_error";
+ goto out;
+ }
+
+ ret = aio_return(&aiocb);
+ if (ret != timelen) {
+ op_errno = errno;
+ op = "aio_write_buf";
+ ret = -1;
+ goto out;
+ }
+
+ sys_close(fd);
+
+ fd = open(file_path, O_RDONLY);
+ if (fd == -1) {
+ op_errno = errno;
+ op = "open_for_read";
+ goto out;
+ }
+
+ memset(&aiocb, 0, sizeof(struct aiocb));
+ aiocb.aio_fildes = fd;
+ aiocb.aio_buf = buff;
+ aiocb.aio_nbytes = sizeof(buff);
+ if (aio_read(&aiocb) == -1) {
+ op_errno = errno;
+ op = "aio_read";
+ goto out;
+ }
+ cnt = 0;
+ /* Wait until read completion */
+ while ((aio_error(&aiocb) == EINPROGRESS) && (++cnt <= timeout))
+ sleep(1);
+
+ ret = aio_error(&aiocb);
+ if (ret != 0) {
+ op_errno = errno;
+ op = "aio_read_error";
+ goto out;
+ }
+
+ ret = aio_return(&aiocb);
+ if (ret != timelen) {
+ op_errno = errno;
+ op = "aio_read_buf";
+ ret = -1;
+ goto out;
+ }
+
+ if (memcmp(timestamp, buff, ret)) {
+ op_errno = EUCLEAN;
+ op = "aio_read_cmp_buf";
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (fd != -1) {
+ sys_close(fd);
+ }
+
+ if (ret && file_path[0]) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED,
+ "%s() on %s returned ret is %d error is %s", op, file_path, ret,
+ ret != -1 ? strerror(ret) : strerror(op_errno));
+
+ if ((op_errno == EAGAIN) || (ret == EAGAIN)) {
+ ret = 0;
+ } else {
+ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
+ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
+ file_path, strerror(op_errno), priv->hostname,
+ priv->base_path, timeout);
}
- pfd->fd = -1;
+ }
+ return ret;
+}
- if (fd->inode->ia_type == IA_IFDIR) {
- dir = opendir (real_path);
- if (!dir) {
- GF_FREE (pfd);
- pfd = NULL;
- goto out;
- }
- _fd = dirfd (dir);
+static void *
+posix_health_check_thread_proc(void *data)
+{
+ xlator_t *this = data;
+ struct posix_private *priv = this->private;
+ uint32_t interval = priv->health_check_interval;
+ int ret = -1;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ xlator_list_t **trav_p = NULL;
+ int count = 0;
+ gf_boolean_t victim_found = _gf_false;
+ glusterfs_ctx_t *ctx = THIS->ctx;
+ char file_path[PATH_MAX];
+
+ /* prevent races when the interval is updated */
+ if (interval == 0)
+ goto out;
+
+ snprintf(file_path, sizeof(file_path) - 1, "%s/%s/health_check",
+ priv->base_path, GF_HIDDEN_PATH);
+
+ gf_msg_debug(this->name, 0,
+ "health-check thread started, "
+ "on path %s, "
+ "interval = %d seconds",
+ file_path, interval);
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep(interval);
+ if (ret > 0)
+ break;
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the health-check.*/
+ ret = posix_fs_health_check(this, file_path);
+ if (ret < 0 && priv->health_check_active)
+ goto abort;
+ if (!priv->health_check_active)
+ goto out;
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "health-check thread exiting");
+
+ LOCK(&priv->lock);
+ {
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK(&priv->lock);
+
+ return NULL;
+
+abort:
+ LOCK(&priv->lock);
+ {
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK(&priv->lock);
+
+ /* health-check failed */
+ gf_msg(this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
+ "health-check failed, going down");
+
+ xlator_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
+
+ /* Below code is use to ensure if brick multiplexing is enabled if
+ count is more than 1 it means brick mux has enabled
+ */
+ if (this->ctx->active) {
+ top = this->ctx->active->first;
+ LOCK(&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ count++;
+ }
+ UNLOCK(&ctx->volfile_lock);
+ }
+
+ if (count == 1) {
+ gf_msg(this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
+ "still alive! -> SIGTERM");
+ ret = sleep(30);
+
+ /* Need to kill the process only while brick mux has not enabled
+ */
+ if (ret == 0)
+ kill(getpid(), SIGTERM);
+
+ ret = sleep(30);
+ gf_msg(this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
+ "still alive! -> SIGKILL");
+ if (ret == 0)
+ kill(getpid(), SIGKILL);
+
+ } else if (top) {
+ LOCK(&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (!victim->call_cleanup &&
+ strcmp(victim->name, priv->base_path) == 0) {
+ victim_found = _gf_true;
+ break;
+ }
}
+ UNLOCK(&ctx->volfile_lock);
+ if (victim_found && !victim->cleanup_starting) {
+ gf_log(THIS->name, GF_LOG_INFO,
+ "detaching not-only "
+ " child %s",
+ priv->base_path);
+ victim->cleanup_starting = 1;
+ top->notify(top, GF_EVENT_CLEANUP, victim);
+ }
+ }
- if (fd->inode->ia_type == IA_IFREG) {
- _fd = open (real_path, O_RDWR|O_LARGEFILE);
- if (_fd == -1) {
- GF_FREE (pfd);
- pfd = NULL;
- goto out;
- }
+ return NULL;
+}
+
+int
+posix_spawn_health_check_thread(xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK(&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->health_check_active == _gf_true) {
+ pthread_cancel(priv->health_check);
+ priv->health_check_active = _gf_false;
}
- pfd->fd = _fd;
- pfd->dir = dir;
+ /* prevent scheduling a check in a tight loop */
+ if (priv->health_check_interval == 0)
+ goto unlock;
- ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd);
- if (ret != 0) {
- if (_fd != -1)
- close (_fd);
- if (dir)
- closedir (dir);
- GF_FREE (pfd);
- pfd = NULL;
- goto out;
+ ret = gf_thread_create(&priv->health_check, NULL,
+ posix_health_check_thread_proc, xl, "posixhc");
+ if (ret) {
+ priv->health_check_interval = 0;
+ priv->health_check_active = _gf_false;
+ gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_HEALTHCHECK_FAILED,
+ "unable to setup health-check thread");
+ goto unlock;
}
- ret = 0;
+ priv->health_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+ return ret;
+}
+
+void
+posix_disk_space_check(xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ char *subvol_path = NULL;
+ int op_ret = 0;
+ double size = 0;
+ double percent = 0;
+ struct statvfs buf = {0};
+ double totsz = 0;
+ double freesz = 0;
+
+ GF_VALIDATE_OR_GOTO("posix-helpers", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ subvol_path = priv->base_path;
+
+ op_ret = sys_statvfs(subvol_path, &buf);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on %s", subvol_path);
+ goto out;
+ }
+
+ if (priv->disk_unit == 'p') {
+ percent = priv->disk_reserve;
+ totsz = (buf.f_blocks * buf.f_bsize);
+ size = ((totsz * percent) / 100);
+ } else {
+ size = priv->disk_reserve;
+ }
+
+ freesz = (buf.f_bfree * buf.f_bsize);
+ if (freesz <= size) {
+ priv->disk_space_full = 1;
+ } else {
+ priv->disk_space_full = 0;
+ }
out:
- if (pfd_p)
- *pfd_p = pfd;
- return ret;
+ return;
}
+static void *
+posix_disk_space_check_thread_proc(void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+
+ this = data;
+ priv = this->private;
+
+ interval = 5;
+ gf_msg_debug(this->name, 0,
+ "disk-space thread started, "
+ "interval = %d seconds",
+ interval);
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep(interval);
+ if (ret > 0)
+ break;
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the disk-check.*/
+ posix_disk_space_check(this);
+ if (!priv->disk_space_check_active)
+ goto out;
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "disk space check thread exiting");
+ LOCK(&priv->lock);
+ {
+ priv->disk_space_check_active = _gf_false;
+ }
+ UNLOCK(&priv->lock);
+
+ return NULL;
+}
int
-posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd)
+posix_spawn_disk_space_check_thread(xlator_t *xl)
{
- int ret;
+ struct posix_private *priv = NULL;
+ int ret = -1;
- LOCK (&fd->inode->lock);
- {
- ret = __posix_fd_ctx_get (fd, this, pfd);
+ priv = xl->private;
+
+ LOCK(&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->disk_space_check_active == _gf_true) {
+ pthread_cancel(priv->disk_space_check);
+ priv->disk_space_check_active = _gf_false;
}
- UNLOCK (&fd->inode->lock);
- return ret;
+ ret = gf_thread_create(&priv->disk_space_check, NULL,
+ posix_disk_space_check_thread_proc, xl,
+ "posixrsv");
+ if (ret) {
+ priv->disk_space_check_active = _gf_false;
+ gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
+ "unable to setup disk space check thread");
+ goto unlock;
+ }
+
+ priv->disk_space_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+ return ret;
}
-static void *
-posix_health_check_thread_proc (void *data)
+int
+posix_fsyncer_pick(xlator_t *this, struct list_head *head)
{
- xlator_t *this = NULL;
- struct posix_private *priv = NULL;
- uint32_t interval = 0;
- int ret = -1;
- struct stat sb = {0, };
+ struct posix_private *priv = NULL;
+ int count = 0;
+
+ priv = this->private;
+ pthread_mutex_lock(&priv->fsync_mutex);
+ {
+ while (list_empty(&priv->fsyncs))
+ pthread_cond_wait(&priv->fsync_cond, &priv->fsync_mutex);
+
+ count = priv->fsync_queue_count;
+ priv->fsync_queue_count = 0;
+ list_splice_init(&priv->fsyncs, head);
+ }
+ pthread_mutex_unlock(&priv->fsync_mutex);
+
+ return count;
+}
- this = data;
- priv = this->private;
+void
+posix_fsyncer_process(xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync)
+{
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GET_FDCTX_FAILED,
+ "could not get fdctx for fd(%s)",
+ uuid_utoa(stub->args.fd->inode->gfid));
+ call_unwind_error(stub, -1, op_errno);
+ return;
+ }
- /* prevent races when the interval is updated */
- interval = priv->health_check_interval;
- if (interval == 0)
- goto out;
+ if (do_fsync && pfd) {
+ if (stub->args.datasync)
+ ret = sys_fdatasync(pfd->fd);
+ else
+ ret = sys_fsync(pfd->fd);
+ } else {
+ ret = 0;
+ }
- gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, "
- "interval = %d seconds", interval);
-
- while (1) {
- /* aborting sleep() is a request to exit this thread, sleep()
- * will normally not return when cancelled */
- ret = sleep (interval);
- if (ret > 0)
- break;
-
- /* prevent thread errors while doing the health-check(s) */
- pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
-
- /* Do the health-check, it should be moved to its own function
- * in case it gets more complex. */
- ret = stat (priv->base_path, &sb);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "stat() on %s returned: %s", priv->base_path,
- strerror (errno));
- goto abort;
- }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "could not fstat fd(%s)", uuid_utoa(stub->args.fd->inode->gfid));
+ call_unwind_error(stub, -1, errno);
+ return;
+ }
+
+ call_unwind_error(stub, 0, 0);
+}
- pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+static void
+posix_fsyncer_syncfs(xlator_t *this, struct list_head *head)
+{
+ call_stub_t *stub = NULL;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+
+ stub = list_entry(head->prev, call_stub_t, list);
+ ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL);
+ if (!ret)
+ (void)gf_syncfs(pfd->fd);
+}
+
+void *
+posix_fsyncer(void *d)
+{
+ xlator_t *this = d;
+ struct posix_private *priv = NULL;
+ call_stub_t *stub = NULL;
+ call_stub_t *tmp = NULL;
+ struct list_head list;
+ int count = 0;
+ gf_boolean_t do_fsync = _gf_true;
+
+ priv = this->private;
+
+ for (;;) {
+ INIT_LIST_HEAD(&list);
+
+ count = posix_fsyncer_pick(this, &list);
+
+ gf_nanosleep(priv->batch_fsync_delay_usec * GF_US_IN_NS);
+
+ gf_msg_debug(this->name, 0, "picked %d fsyncs", count);
+
+ switch (priv->batch_fsync_mode) {
+ case BATCH_NONE:
+ case BATCH_REVERSE_FSYNC:
+ break;
+ case BATCH_SYNCFS:
+ case BATCH_SYNCFS_SINGLE_FSYNC:
+ case BATCH_SYNCFS_REVERSE_FSYNC:
+ posix_fsyncer_syncfs(this, &list);
+ break;
}
-out:
- gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting");
+ if (priv->batch_fsync_mode == BATCH_SYNCFS)
+ do_fsync = _gf_false;
+ else
+ do_fsync = _gf_true;
- LOCK (&priv->lock);
+ list_for_each_entry_safe_reverse(stub, tmp, &list, list)
{
- priv->health_check_active = _gf_false;
+ list_del_init(&stub->list);
+
+ posix_fsyncer_process(this, stub, do_fsync);
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC)
+ do_fsync = _gf_false;
}
- UNLOCK (&priv->lock);
+ }
+}
- return NULL;
+/**
+ * TODO: move fd/inode interfaces into a single routine..
+ */
+static int32_t
+posix_fetch_signature_xattr(char *real_path, const char *key, dict_t *xattr,
+ size_t *xsize)
+{
+ int32_t ret = 0;
+ char *memptr = NULL;
+ ssize_t xattrsize = 0;
+ char val_buf[2048] = {
+ 0,
+ };
+ gf_boolean_t have_val = _gf_false;
+
+ xattrsize = sys_lgetxattr(real_path, key, val_buf, sizeof(val_buf) - 1);
+ if (xattrsize >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE)
+ xattrsize = sys_lgetxattr(real_path, key, NULL, 0);
+ if ((errno == ENOATTR) || (errno == ENODATA))
+ return 0;
+ if (xattrsize == -1)
+ goto error_return;
+ }
+ memptr = GF_MALLOC(xattrsize + 1, gf_posix_mt_char);
+ if (!memptr)
+ goto error_return;
+ if (have_val) {
+ memcpy(memptr, val_buf, xattrsize);
+ memptr[xattrsize] = '\0';
+ } else {
+ bzero(memptr, xattrsize + 1);
+ ret = sys_lgetxattr(real_path, key, memptr, xattrsize);
+ if (ret == -1)
+ goto freemem;
+ }
+ ret = dict_set_dynptr(xattr, (char *)key, memptr, xattrsize);
+ if (ret)
+ goto freemem;
-abort:
- /* health-check failed */
- gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down");
- xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
+ if (xsize)
+ *xsize = xattrsize;
+
+ return 0;
+
+freemem:
+ GF_FREE(memptr);
+error_return:
+ return -1;
+}
+
+static int32_t
+posix_fd_fetch_signature_xattr(int fd, const char *key, dict_t *xattr,
+ size_t *xsize)
+{
+ int32_t ret = 0;
+ char *memptr = NULL;
+ ssize_t xattrsize = 0;
+
+ xattrsize = sys_fgetxattr(fd, key, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA)))
+ return 0;
+ if (xattrsize == -1)
+ goto error_return;
+
+ memptr = GF_CALLOC(xattrsize + 1, sizeof(char), gf_posix_mt_char);
+ if (!memptr)
+ goto error_return;
+ ret = sys_fgetxattr(fd, key, memptr, xattrsize);
+ if (ret == -1)
+ goto freemem;
+
+ ret = dict_set_dynptr(xattr, (char *)key, memptr, xattrsize);
+ if (ret)
+ goto freemem;
- ret = sleep (30);
- if (ret == 0) {
- gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM");
- kill (getpid(), SIGTERM);
+ if (xsize)
+ *xsize = xattrsize;
+
+ return 0;
+
+freemem:
+ GF_FREE(memptr);
+error_return:
+ return -1;
+}
+
+/**
+ * Fetch on-disk ongoing version and object signature extended attribute.
+ * Be generous to absence of xattrs (just *absence*, other errors are
+ * propagated up to the invoker), higher layer (br-stub) takes care of
+ * interpreting the xattrs for anomalies.
+ */
+int32_t
+posix_get_objectsignature(char *real_path, dict_t *xattr)
+{
+ int32_t ret = 0;
+ size_t signsize = 0;
+
+ ret = posix_fetch_signature_xattr(real_path, BITROT_CURRENT_VERSION_KEY,
+ xattr, NULL);
+ if (ret)
+ goto error_return;
+
+ ret = posix_fetch_signature_xattr(real_path, BITROT_SIGNING_VERSION_KEY,
+ xattr, &signsize);
+ if (ret)
+ goto delkey1;
+
+ ret = dict_set_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY,
+ (uint32_t)signsize);
+ if (ret)
+ goto delkey2;
+
+ return 0;
+
+delkey2:
+ dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
+delkey1:
+ dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
+error_return:
+ return -EINVAL;
+}
+
+int32_t
+posix_fdget_objectsignature(int fd, dict_t *xattr)
+{
+ int32_t ret = 0;
+ size_t signsize = 0;
+
+ ret = posix_fd_fetch_signature_xattr(fd, BITROT_CURRENT_VERSION_KEY, xattr,
+ NULL);
+ if (ret)
+ goto error_return;
+
+ ret = posix_fd_fetch_signature_xattr(fd, BITROT_SIGNING_VERSION_KEY, xattr,
+ &signsize);
+ if (ret)
+ goto delkey1;
+
+ ret = dict_set_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY,
+ (uint32_t)signsize);
+ if (ret)
+ goto delkey2;
+
+ return 0;
+
+delkey2:
+ dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
+delkey1:
+ dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
+error_return:
+ return -EINVAL;
+}
+
+/*
+ * posix_resolve_dirgfid_to_path:
+ * It converts given dirgfid to path by doing recursive readlinks at the
+ * backend. If bname is given, it suffixes bname to dir path to form the
+ * complete path else it doesn't. It allocates memory for the path and is
+ * caller's responsibility to free the same. If bname is NULL and pargfid
+ * is ROOT, then it returns "/"
+ **/
+
+int32_t
+posix_resolve_dirgfid_to_path(const uuid_t dirgfid, const char *brick_path,
+ const char *bname, char **path)
+{
+ char *linkname = NULL;
+ char *dir_handle = NULL;
+ char *pgfidstr = NULL;
+ char *saveptr = NULL;
+ ssize_t len = 0;
+ int ret = 0;
+ uuid_t tmp_gfid = {
+ 0,
+ };
+ uuid_t pargfid = {
+ 0,
+ };
+ char gpath[PATH_MAX] = {
+ 0,
+ };
+ char result[PATH_MAX] = {
+ 0,
+ };
+ char result1[PATH_MAX] = {
+ 0,
+ };
+ char *dir_name = NULL;
+ char pre_dir_name[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ gf_uuid_copy(pargfid, dirgfid);
+ if (!path || gf_uuid_is_null(pargfid)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (__is_root_gfid(pargfid)) {
+ if (bname) {
+ snprintf(result, PATH_MAX, "/%s", bname);
+ *path = gf_strdup(result);
+ } else {
+ *path = gf_strdup("/");
+ }
+ return ret;
+ }
+
+ dir_handle = alloca(PATH_MAX);
+ linkname = alloca(PATH_MAX);
+ (void)snprintf(gpath, PATH_MAX, "%s/.glusterfs/", brick_path);
+
+ while (!(__is_root_gfid(pargfid))) {
+ len = snprintf(dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath,
+ pargfid[0], pargfid[1], uuid_utoa(pargfid));
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
}
- ret = sleep (30);
- if (ret == 0) {
- gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL");
- kill (getpid(), SIGKILL);
+ len = sys_readlink(dir_handle, linkname, PATH_MAX);
+ if (len < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED,
+ "could not read the "
+ "link from the gfid handle %s",
+ dir_handle);
+ ret = -1;
+ goto out;
}
+ linkname[len] = '\0';
+
+ pgfidstr = strtok_r(linkname + SLEN("../../00/00/"), "/", &saveptr);
+ dir_name = strtok_r(NULL, "/", &saveptr);
+
+ if (pre_dir_name[0] != '\0') { /* Remove '/' at the end */
+ len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name);
+ } else {
+ len = snprintf(result, PATH_MAX, "%s", dir_name);
+ }
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(pre_dir_name, sizeof(pre_dir_name), "%s", result);
+
+ gf_uuid_parse(pgfidstr, tmp_gfid);
+ gf_uuid_copy(pargfid, tmp_gfid);
+ }
+
+ if (bname) {
+ len = snprintf(result1, PATH_MAX, "/%s/%s", result, bname);
+ } else {
+ len = snprintf(result1, PATH_MAX, "/%s", result);
+ }
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ *path = gf_strdup(result1);
+ if (*path == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+posix_inode_ctx_t *
+__posix_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ posix_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret == 0) {
+ return (posix_inode_ctx_t *)(uintptr_t)ctx_uint;
+ }
+
+ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_posix_mt_inode_ctx_t);
+ if (!ctx_p)
+ return NULL;
+
+ pthread_mutex_init(&ctx_p->xattrop_lock, NULL);
+ pthread_mutex_init(&ctx_p->write_atomic_lock, NULL);
+ pthread_mutex_init(&ctx_p->pgfid_lock, NULL);
+
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set(inode, this, &ctx_uint);
+ if (ret < 0) {
+ pthread_mutex_destroy(&ctx_p->xattrop_lock);
+ pthread_mutex_destroy(&ctx_p->write_atomic_lock);
+ pthread_mutex_destroy(&ctx_p->pgfid_lock);
+ GF_FREE(ctx_p);
return NULL;
+ }
+
+ return ctx_p;
}
-void
-posix_spawn_health_check_thread (xlator_t *xl)
+int
+__posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx)
{
- struct posix_private *priv = NULL;
- int ret = -1;
+ posix_inode_ctx_t *ctx_p = NULL;
- priv = xl->private;
+ ctx_p = __posix_inode_ctx_get(inode, this);
+ if (ctx_p == NULL)
+ return -1;
- LOCK (&priv->lock);
- {
- /* cancel the running thread */
- if (priv->health_check_active == _gf_true) {
- pthread_cancel (priv->health_check);
- priv->health_check_active = _gf_false;
- }
+ ctx_p->unlink_flag = ctx;
- /* prevent scheduling a check in a tight loop */
- if (priv->health_check_interval == 0)
- goto unlock;
-
- ret = gf_thread_create (&priv->health_check, NULL,
- posix_health_check_thread_proc, xl);
- if (ret < 0) {
- priv->health_check_interval = 0;
- priv->health_check_active = _gf_false;
- gf_log (xl->name, GF_LOG_ERROR,
- "unable to setup health-check thread: %s",
- strerror (errno));
- goto unlock;
- }
+ return 0;
+}
- /* run the thread detached, resources will be freed on exit */
- pthread_detach (priv->health_check);
- priv->health_check_active = _gf_true;
- }
-unlock:
- UNLOCK (&priv->lock);
+int
+posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __posix_inode_ctx_set_unlink_flag(inode, this, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
int
-posix_fsyncer_pick (xlator_t *this, struct list_head *head)
+__posix_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+ posix_inode_ctx_t **ctx)
{
- struct posix_private *priv = NULL;
- int count = 0;
+ posix_inode_ctx_t *ctx_p = NULL;
- priv = this->private;
- pthread_mutex_lock (&priv->fsync_mutex);
- {
- while (list_empty (&priv->fsyncs))
- pthread_cond_wait (&priv->fsync_cond,
- &priv->fsync_mutex);
+ ctx_p = __posix_inode_ctx_get(inode, this);
+ if (ctx_p == NULL)
+ return -1;
- count = priv->fsync_queue_count;
- priv->fsync_queue_count = 0;
- list_splice_init (&priv->fsyncs, head);
- }
- pthread_mutex_unlock (&priv->fsync_mutex);
+ *ctx = ctx_p;
- return count;
+ return 0;
}
+int
+posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, posix_inode_ctx_t **ctx)
+{
+ int ret = 0;
-void
-posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync)
+ LOCK(&inode->lock);
+ {
+ ret = __posix_inode_ctx_get_all(inode, this, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+gf_boolean_t
+posix_is_bulk_removexattr(char *name, dict_t *xdata)
{
- struct posix_fd *pfd = NULL;
- int ret = -1;
- struct posix_private *priv = NULL;
+ if (name && (name[0] == '\0') && xdata)
+ return _gf_true;
+ return _gf_false;
+}
- priv = this->private;
+int32_t
+posix_set_iatt_in_dict(dict_t *dict, struct iatt *preop, struct iatt *postop)
+{
+ int ret = -1;
+ struct iatt *stbuf = NULL;
+ int32_t len = sizeof(struct iatt);
+ struct iatt *prebuf = NULL;
+ struct iatt *postbuf = NULL;
- ret = posix_fd_ctx_get (stub->args.fd, this, &pfd);
+ if (!dict)
+ return ret;
+
+ if (postop) {
+ stbuf = GF_MALLOC(len, gf_common_mt_char);
+ if (!stbuf)
+ goto out;
+ memcpy(stbuf, postop, len);
+ ret = dict_set_iatt(dict, DHT_IATT_IN_XDATA_KEY, stbuf, false);
+ if (ret < 0) {
+ GF_FREE(stbuf);
+ goto out;
+ }
+ }
+
+ if (preop) {
+ prebuf = GF_MALLOC(len, gf_common_mt_char);
+ if (!prebuf)
+ goto out;
+ memcpy(prebuf, preop, len);
+ ret = dict_set_iatt(dict, GF_PRESTAT, prebuf, false);
+ if (ret < 0) {
+ GF_FREE(prebuf);
+ goto out;
+ }
+ }
+
+ if (postop) {
+ postbuf = GF_MALLOC(len, gf_common_mt_char);
+ if (!postbuf)
+ goto out;
+ memcpy(postbuf, postop, len);
+ ret = dict_set_iatt(dict, GF_POSTSTAT, postbuf, false);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fdctx for fd(%s)",
- uuid_utoa (stub->args.fd->inode->gfid));
- call_unwind_error (stub, -1, EINVAL);
- return;
+ GF_FREE(postbuf);
+ goto out;
}
+ }
- if (do_fsync) {
- if (stub->args.datasync)
- ret = sys_fdatasync (pfd->fd);
- else
- ret = sys_fsync (pfd->fd);
- } else {
+ ret = 0;
+out:
+ return ret;
+}
+
+mode_t
+posix_override_umask(mode_t mode, mode_t mode_bit)
+{
+ gf_msg_debug("posix", 0, "The value of mode is %u", mode);
+ mode = mode >> 9; /* 3x3 (bits for each octal digit)*/
+ mode = (mode << 9) | mode_bit;
+ gf_msg_debug("posix", 0, "The value of mode is %u", mode);
+ return mode;
+}
+
+int
+posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata)
+{
+ int ret = 0;
+ size_t xattrsize = 0;
+ data_t *val = NULL;
+
+ if (!xdata)
+ return 0;
+
+ LOCK(&fd->inode->lock);
+ {
+ val = dict_get_sizen(xdata, GF_PROTECT_FROM_EXTERNAL_WRITES);
+ if (val) {
+ ret = sys_fsetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ val->data, val->len, 0);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_FAILED, errno,
+ "setxattr failed key %s",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ }
+
+ goto out;
+ }
+
+ if (dict_get_sizen(xdata, GF_AVOID_OVERWRITE)) {
+ xattrsize = sys_fgetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ NULL, 0);
+ if ((xattrsize == -1) &&
+ ((errno == ENOATTR) || (errno == ENODATA))) {
ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+ }
+out:
+ UNLOCK(&fd->inode->lock);
+ return ret;
+}
+
+gf_cs_obj_state
+posix_cs_heal_state(xlator_t *this, const char *realpath, int *fd,
+ struct iatt *buf)
+{
+ gf_boolean_t remote = _gf_false;
+ gf_boolean_t downloading = _gf_false;
+ int ret = 0;
+ gf_cs_obj_state state = GF_CS_ERROR;
+ size_t xattrsize = 0;
+
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ if (fd) {
+ xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ remote = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "fgetxattr"
+ " failed");
+ state = GF_CS_ERROR;
+ goto out;
+ } else {
+ remote = _gf_true;
+ }
+
+ xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_DOWNLOADING, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ downloading = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "fgetxattr"
+ " failed");
+ state = GF_CS_ERROR;
+ goto out;
+ } else {
+ downloading = _gf_true;
+ }
+ } else {
+ xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_REMOTE, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ remote = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "getxattr"
+ " failed");
+ state = GF_CS_ERROR;
+ goto out;
+ } else {
+ remote = _gf_true;
+ }
+
+ xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_DOWNLOADING, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ downloading = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "getxattr"
+ " failed");
+ state = GF_CS_ERROR;
+ goto out;
+ } else {
+ downloading = _gf_true;
+ }
+ }
+
+ if (remote && downloading) {
+ if (fd) {
+ ret = sys_fremovexattr(*fd, GF_CS_OBJECT_DOWNLOADING);
+ } else {
+ ret = sys_lremovexattr(realpath, GF_CS_OBJECT_DOWNLOADING);
}
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not fstat fd(%s)",
- uuid_utoa (stub->args.fd->inode->gfid));
- call_unwind_error (stub, -1, errno);
- return;
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "failed to remove xattr, repair failed");
+ state = GF_CS_ERROR;
+ goto out;
}
- call_unwind_error (stub, 0, 0);
-}
+ if (buf->ia_size) {
+ if (fd) {
+ ret = sys_ftruncate(*fd, 0);
+ } else {
+ ret = sys_truncate(realpath, 0);
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "truncate failed. File is in inconsistent"
+ " state");
+ state = GF_CS_ERROR;
+ goto out;
+ }
+ }
+ state = GF_CS_REMOTE;
+ goto out;
+
+ } else if (remote) {
+ if (buf->ia_size) {
+ if (fd) {
+ ret = sys_ftruncate(*fd, 0);
+ } else {
+ ret = sys_truncate(realpath, 0);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "truncate failed. File is in inconsistent"
+ " state");
+ state = GF_CS_ERROR;
+ goto out;
+ }
+ }
-static void
-posix_fsyncer_syncfs (xlator_t *this, struct list_head *head)
-{
- call_stub_t *stub = NULL;
- struct posix_fd *pfd = NULL;
- int ret = -1;
-
- stub = list_entry (head->prev, call_stub_t, list);
- ret = posix_fd_ctx_get (stub->args.fd, this, &pfd);
- if (ret)
- return;
-
-#ifdef GF_LINUX_HOST_OS
- /* syncfs() is not "declared" in RHEL's glibc even though
- the kernel has support.
- */
-#include <sys/syscall.h>
-#include <unistd.h>
-#ifdef SYS_syncfs
- syscall (SYS_syncfs, pfd->fd);
-#else
- sync();
-#endif
-#else
- sync();
-#endif
+ state = GF_CS_REMOTE;
+ goto out;
+ } else if (downloading) {
+ if (buf->ia_size) {
+ if (fd) {
+ ret = sys_fremovexattr(*fd, GF_CS_OBJECT_DOWNLOADING);
+ } else {
+ ret = sys_lremovexattr(realpath, GF_CS_OBJECT_DOWNLOADING);
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ "failed to remove xattr, repair failed");
+ state = GF_CS_ERROR;
+ goto out;
+ }
+
+ state = GF_CS_LOCAL;
+ goto out;
+ }
+ }
+
+ state = GF_CS_LOCAL;
+out:
+ gf_msg_debug(this->name, 0, "heal state returned %d", state);
+ return state;
}
+gf_cs_obj_state
+posix_cs_check_status(xlator_t *this, const char *realpath, int *fd,
+ struct iatt *buf)
+{
+ gf_boolean_t remote = _gf_false;
+ gf_boolean_t downloading = _gf_false;
+ int ret = 0;
+ gf_cs_obj_state state = GF_CS_LOCAL;
+ size_t xattrsize = 0;
+ int op_errno = 0;
+
+ if (fd) {
+ xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ remote = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "getxattr "
+ "failed err %d",
+ errno);
+ goto out;
+ } else {
+ remote = _gf_true;
+ }
-void *
-posix_fsyncer (void *d)
+ xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_DOWNLOADING, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ downloading = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "getxattr "
+ "failed err : %d",
+ errno);
+
+ goto out;
+ } else {
+ downloading = _gf_true;
+ }
+ }
+
+ if (realpath) {
+ xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_REMOTE, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ remote = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "getxattr "
+ "failed err : %d",
+ errno);
+ goto out;
+ } else {
+ remote = _gf_true;
+ }
+
+ xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_DOWNLOADING, NULL, 0);
+ if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) {
+ downloading = _gf_false;
+ } else if (xattrsize == -1) {
+ ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "getxattr "
+ "failed err : %d",
+ errno);
+ goto out;
+ } else {
+ downloading = _gf_true;
+ }
+ }
+
+out:
+ if (ret) {
+ gf_msg("POSIX", GF_LOG_ERROR, 0, op_errno,
+ "getxattr failed "
+ "with %d",
+ op_errno);
+ state = GF_CS_ERROR;
+ return state;
+ }
+
+ if ((remote && downloading) || (remote && buf && buf->ia_size)) {
+ state = GF_CS_REPAIR;
+ gf_msg_debug(this->name, 0, "status is REPAIR");
+ return state;
+ }
+
+ if (remote)
+ state = GF_CS_REMOTE;
+ else if (downloading)
+ state = GF_CS_DOWNLOADING;
+ else
+ state = GF_CS_LOCAL;
+
+ gf_msg_debug(this->name, 0, "state returned is %d", state);
+ return state;
+}
+
+int
+posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state,
+ char const *path, int *fd)
{
- xlator_t *this = d;
- struct posix_private *priv = NULL;
- call_stub_t *stub = NULL;
- call_stub_t *tmp = NULL;
- struct list_head list;
- int count = 0;
- gf_boolean_t do_fsync = _gf_true;
+ int ret = 0;
+ char *value = NULL;
+ size_t xattrsize = 0;
+
+ if (!rsp) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!(*rsp)) {
+ *rsp = dict_new();
+ if (!(*rsp)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "failed to"
+ " create dict");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_set_uint64(*rsp, GF_CS_OBJECT_STATUS, state);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set "
+ "dict");
+ ret = -1;
+ goto out;
+ }
+
+ if (fd) {
+ xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, NULL, 0);
+ if (xattrsize != -1) {
+ value = GF_CALLOC(1, xattrsize + 1, gf_posix_mt_char);
+ if (!value) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "no memory for value");
+ ret = -1;
+ goto out;
+ }
+ /* TODO: Add check for ENODATA */
+ xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, value,
+ xattrsize + 1);
+ if (xattrsize == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ " getxattr failed for key %s", GF_CS_OBJECT_REMOTE);
+ goto out;
+ } else {
+ value[xattrsize] = '\0';
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ " getxattr failed for key %s", GF_CS_OBJECT_REMOTE);
+ goto out;
+ }
+ } else {
+ xattrsize = sys_lgetxattr(path, GF_CS_OBJECT_REMOTE, NULL, 0);
+ if (xattrsize != -1) {
+ value = GF_CALLOC(1, xattrsize + 1, gf_posix_mt_char);
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
- priv = this->private;
+ xattrsize = sys_lgetxattr(path, GF_CS_OBJECT_REMOTE, value,
+ xattrsize + 1);
+ if (xattrsize == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ " getxattr failed for key %s", GF_CS_OBJECT_REMOTE);
+ goto out;
+ } else {
+ value[xattrsize] = '\0';
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, errno,
+ " getxattr failed for key %s", GF_CS_OBJECT_REMOTE);
+ goto out;
+ }
+ }
- for (;;) {
- INIT_LIST_HEAD (&list);
+ if (ret == 0) {
+ ret = dict_set_str(*rsp, GF_CS_OBJECT_REMOTE, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to set"
+ "value");
+ }
+ }
- count = posix_fsyncer_pick (this, &list);
+out:
+ return ret;
+}
+
+/* This function checks the status of the file and updates the xattr response.
+ * Also it repairs the state of the file which could have been resulted from a
+ * crash or transient failures.
+ */
+int
+posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
+ struct iatt *buf, const char *realpath, dict_t *xattr_req,
+ dict_t **xattr_rsp, gf_boolean_t ignore_failure)
+{
+ gf_cs_obj_state state = GF_CS_ERROR;
+ int ret = 0;
+ gf_boolean_t is_cs_obj_status = _gf_false;
+ gf_boolean_t is_cs_obj_repair = _gf_false;
- usleep (priv->batch_fsync_delay_usec);
+ if (dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS))
+ is_cs_obj_status = _gf_true;
+ if (dict_get_sizen(xattr_req, GF_CS_OBJECT_REPAIR))
+ is_cs_obj_repair = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG,
- "picked %d fsyncs", count);
+ if (!(is_cs_obj_status || is_cs_obj_repair))
+ return 0;
- switch (priv->batch_fsync_mode) {
- case BATCH_NONE:
- case BATCH_REVERSE_FSYNC:
- break;
- case BATCH_SYNCFS:
- case BATCH_SYNCFS_SINGLE_FSYNC:
- case BATCH_SYNCFS_REVERSE_FSYNC:
- posix_fsyncer_syncfs (this, &list);
- break;
+ if (fd) {
+ LOCK(&fd->inode->lock);
+ if (is_cs_obj_status) {
+ state = posix_cs_check_status(this, NULL, pfd, buf);
+ gf_msg_debug(this->name, 0, "state : %d", state);
+ ret = posix_cs_set_state(this, xattr_rsp, state, NULL, pfd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "posix_cs_set_state failed");
+ }
+
+ if (ignore_failure) {
+ ret = 0;
+ goto unlock;
+ } else {
+ if (state != GF_CS_LOCAL || ret != 0) {
+ ret = -1;
+ goto unlock;
}
+ }
+ }
+
+ if (is_cs_obj_repair) {
+ state = posix_cs_check_status(this, NULL, pfd, buf);
+ gf_msg_debug(this->name, 0, "state : %d", state);
+
+ if (state == GF_CS_REPAIR) {
+ state = posix_cs_heal_state(this, NULL, pfd, buf);
- if (priv->batch_fsync_mode == BATCH_SYNCFS)
- do_fsync = _gf_false;
+ if (state == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "repair check failed");
+ }
+ }
+
+ ret = posix_cs_set_state(this, xattr_rsp, state, NULL, pfd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "posix_cs_set_state failed");
+ if (ignore_failure)
+ ret = 0;
else
- do_fsync = _gf_true;
+ ret = -1;
+ goto unlock;
+ }
+ }
+ } else {
+ if (!loc->inode) {
+ ret = 0;
+ goto out;
+ }
+
+ LOCK(&loc->inode->lock);
+ if (is_cs_obj_status) {
+ state = posix_cs_check_status(this, realpath, NULL, buf);
+ gf_msg_debug(this->name, 0, "state : %d", state);
+ ret = posix_cs_set_state(this, xattr_rsp, state, realpath, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "posix_cs_set_state failed");
+ }
+
+ if (ignore_failure) {
+ ret = 0;
+ goto unlock;
+ } else {
+ if (state != GF_CS_LOCAL || ret != 0) {
+ ret = -1;
+ goto unlock;
+ }
+ }
+ }
- list_for_each_entry_safe_reverse (stub, tmp, &list, list) {
- list_del_init (&stub->list);
+ if (is_cs_obj_repair) {
+ state = posix_cs_check_status(this, realpath, NULL, buf);
+ gf_msg_debug(this->name, 0, "state : %d", state);
- posix_fsyncer_process (this, stub, do_fsync);
+ if (state == GF_CS_REPAIR) {
+ state = posix_cs_heal_state(this, realpath, NULL, buf);
- if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC)
- do_fsync = _gf_false;
+ if (state == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "repair check failed");
}
+ }
+
+ ret = posix_cs_set_state(this, xattr_rsp, state, realpath, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "posix_cs_set_state failed");
+ if (ignore_failure)
+ ret = 0;
+ else
+ ret = -1;
+ goto unlock;
+ }
+ }
+ }
+
+unlock:
+ if (fd)
+ UNLOCK(&fd->inode->lock);
+ else
+ UNLOCK(&loc->inode->lock);
+out:
+ return ret;
+}
+
+int
+posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno)
+{
+ int ret = -1;
+
+ if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) {
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT,
+ "%s received on %s file (%s)", fop,
+ (inode->ia_type == IA_IFBLK) ? "block" : "char",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+void
+posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req)
+{
+ int ret = 0;
+ char val[4096] = {
+ 0,
+ };
+
+ if (!xattr_req)
+ return;
+
+ if (!dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS))
+ return;
+
+ if (fd != -1) {
+ ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val));
+ if (ret > 0) {
+ buf->ia_size = atoll(val);
+ } else {
+ /* Safe to assume that the other 2 xattrs are also not set*/
+ return;
+ }
+ ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val));
+ if (ret > 0) {
+ buf->ia_blksize = atoll(val);
+ }
+ ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val));
+ if (ret > 0) {
+ buf->ia_blocks = atoll(val);
+ }
+ } else {
+ ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val));
+ if (ret > 0) {
+ buf->ia_size = atoll(val);
+ } else {
+ /* Safe to assume that the other 2 xattrs are also not set*/
+ return;
}
+ ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val));
+ if (ret > 0) {
+ buf->ia_blksize = atoll(val);
+ }
+ ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val));
+ if (ret > 0) {
+ buf->ia_blocks = atoll(val);
+ }
+ }
+}
+
+gf_boolean_t
+posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
+{
+ int op_ret = 0;
+ ssize_t size = 0;
+ char value_buf[4096] = {
+ 0,
+ };
+ gf_boolean_t have_val = _gf_false;
+ data_t *arg_data = NULL;
+ char *xattr_name = NULL;
+ size_t xattr_len = 0;
+ gf_boolean_t is_stale = _gf_false;
+
+ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+ if (xattr_name == NULL) {
+ op_ret = 0;
+ return is_stale;
+ }
+
+ xattr_len = strlen(xattr_name);
+ arg_data = dict_getn(xdata, xattr_name, xattr_len);
+ if (!arg_data) {
+ op_ret = 0;
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+ return is_stale;
+ }
+
+ size = sys_lgetxattr(par_path, xattr_name, value_buf,
+ sizeof(value_buf) - 1);
+
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) path (%s) failed due to"
+ " buffer overflow",
+ xattr_name, par_path);
+ size = sys_lgetxattr(par_path, xattr_name, NULL, 0);
+ }
+ if (size < 0) {
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) failed, path : %s", xattr_name,
+ par_path);
+ goto out;
+ }
+ }
+
+ if (!have_val) {
+ size = sys_lgetxattr(par_path, xattr_name, value_buf, size);
+ if (size < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) failed (%s)", xattr_name,
+ strerror(errno));
+ goto out;
+ }
+ }
+
+ if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) {
+ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED,
+ "failing preop as on-disk xattr value differs from argument "
+ "value for key %s",
+ xattr_name);
+ op_ret = -1;
+ }
+
+out:
+ dict_deln(xdata, xattr_name, xattr_len);
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+
+ if (op_ret == -1) {
+ is_stale = _gf_true;
+ }
+
+ return is_stale;
+}
+
+/* Delete user xattr from the file at the file-path specified by data and from
+ * dict */
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
+{
+ int ret;
+ char *real_path = data;
+
+ ret = sys_lremovexattr(real_path, k);
+ if (ret) {
+ gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
+ "removexattr failed. key %s path %s", k, real_path);
+ }
+
+ dict_del(dict, k);
+
+ return ret;
}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
new file mode 100644
index 00000000000..6d54d37e5aa
--- /dev/null
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -0,0 +1,6004 @@
+/*
+ Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define __XOPEN_SOURCE 500
+
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <openssl/md5.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <regex.h>
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#ifdef HAVE_LINKAT
+#include <fcntl.h>
+#endif /* HAVE_LINKAT */
+
+#include <glusterfs/checksum.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include "posix-handle.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
+#include "glusterfs3-xdr.h"
+#include <glusterfs/glusterfs-acl.h>
+#include "posix-messages.h"
+#include "posix-metadata.h"
+#include <glusterfs/events.h>
+#include "posix-gfid-path.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/common-utils.h>
+
+extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
+
+#undef HAVE_SET_FSID
+#ifdef HAVE_SET_FSID
+
+#define DECLARE_OLD_FS_ID_VAR \
+ uid_t old_fsuid; \
+ gid_t old_fsgid;
+
+#define SET_FS_ID(uid, gid) \
+ do { \
+ old_fsuid = setfsuid(uid); \
+ old_fsgid = setfsgid(gid); \
+ } while (0)
+
+#define SET_TO_OLD_FS_ID() \
+ do { \
+ setfsuid(old_fsuid); \
+ setfsgid(old_fsgid); \
+ } while (0)
+
+#else
+
+#define DECLARE_OLD_FS_ID_VAR
+#define SET_FS_ID(uid, gid)
+#define SET_TO_OLD_FS_ID()
+
+#endif
+
+/* Setting microseconds or nanoseconds depending on what's supported:
+ The passed in `tv` can be
+ struct timespec
+ if supported (better, because it supports nanosecond resolution) or
+ struct timeval
+ otherwise. */
+#if HAVE_UTIMENSAT
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs
+#else
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_usec = nanosecs / 1000
+#endif
+
+static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY,
+ NULL};
+
+void
+posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd,
+ char *loc)
+{
+ int ret = 0;
+ uuid_t uuid;
+
+ if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS))
+ return;
+
+ if (!(*rsp)) {
+ *rsp = dict_new();
+ if (!(*rsp)) {
+ return;
+ }
+ }
+
+ if (fd != -1) {
+ if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) {
+ ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16);
+ if (ret > 0) {
+ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid,
+ true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s for fd %d",
+ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd);
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d",
+ GF_CS_XATTR_ARCHIVE_UUID, fd);
+ }
+ }
+ } else {
+ if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) {
+ ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16);
+ if (ret > 0) {
+ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid,
+ true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s for loc %s",
+ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc);
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "getxattr failed on %s for %s",
+ GF_CS_XATTR_ARCHIVE_UUID, loc);
+ }
+ }
+ }
+ return;
+}
+
+int32_t
+posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {
+ 0,
+ };
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_private *priv = NULL;
+ char *real_path = NULL;
+ dict_t *xattr_rsp = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ gf_msg_debug(this->name, 0,
+ "lstat on gfid-handle %s (path: %s)"
+ "failed: %s",
+ real_path ? real_path : "<null>", loc->path,
+ strerror(op_errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED,
+ "lstat on gfid-handle %s (path: %s) failed",
+ real_path ? real_path : "<null>", loc->path);
+ }
+ goto out;
+ }
+ if (xdata) {
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &buf);
+
+ posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata,
+ &xattr_rsp, _gf_true);
+
+ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path);
+ }
+
+ posix_update_iatt_buf(&buf, -1, real_path, xdata);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+}
+
+static int
+posix_do_chmod(xlator_t *this, const char *path, struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ mode_t mode = 0;
+ mode_t mode_bit = 0;
+ struct posix_private *priv = NULL;
+ struct stat stat;
+ int is_symlink = 0;
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ ret = sys_lstat(path, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", path);
+ goto out;
+ }
+
+ if (S_ISLNK(stat.st_mode))
+ is_symlink = 1;
+
+ if (S_ISDIR(stat.st_mode)) {
+ mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
+ mode_bit = (mode & priv->create_directory_mask) |
+ priv->force_directory_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ } else {
+ mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
+ mode_bit = (mode & priv->create_mask) | priv->force_create_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ }
+ ret = lchmod(path, mode);
+ if ((ret == -1) && (errno == ENOSYS)) {
+ /* in Linux symlinks are always in mode 0777 and no
+ such call as lchmod exists.
+ */
+ gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno));
+ if (is_symlink) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_chmod(path, mode);
+ }
+out:
+ return ret;
+}
+
+static int
+posix_do_chown(xlator_t *this, const char *path, struct iatt *stbuf,
+ int32_t valid)
+{
+ int32_t ret = -1;
+ uid_t uid = -1;
+ gid_t gid = -1;
+
+ if (valid & GF_SET_ATTR_UID)
+ uid = stbuf->ia_uid;
+
+ if (valid & GF_SET_ATTR_GID)
+ gid = stbuf->ia_gid;
+
+ ret = sys_lchown(path, uid, gid);
+
+ return ret;
+}
+
+static int
+posix_do_utimes(xlator_t *this, const char *path, struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#else
+ struct timeval tv[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#endif
+ struct stat stat;
+ int is_symlink = 0;
+
+ ret = sys_lstat(path, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%s",
+ path);
+ goto out;
+ }
+
+ if (S_ISLNK(stat.st_mode))
+ is_symlink = 1;
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv[0].tv_sec = stbuf->ia_atime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec);
+ } else {
+ /* atime is not given, use current values */
+ tv[0].tv_sec = ST_ATIM_SEC(&stat);
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC(&stat));
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv[1].tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec);
+ } else {
+ /* mtime is not given, use current values */
+ tv[1].tv_sec = ST_MTIM_SEC(&stat);
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC(&stat));
+ }
+
+ ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv);
+ if ((ret == -1) && (errno == ENOSYS)) {
+ gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno));
+ if (is_symlink) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv);
+ }
+
+out:
+ return ret;
+}
+
+int
+posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_path, this, loc, &statpre);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "setattr (lstat) on gfid-handle %s (path: %s) failed",
+ real_path ? real_path : "<null>", loc->path);
+ goto out;
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ op_ret = posix_do_chown(this, real_path, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
+ "setattr (chown) on %s "
+ "failed",
+ real_path);
+ goto out;
+ }
+ }
+
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_chmod(this, real_path, stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED,
+ "setattr (chmod) on gfid-handle %s (path: %s) "
+ "failed",
+ real_path, loc->path);
+ goto out;
+ }
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ op_ret = posix_do_utimes(this, real_path, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
+ "setattr (utimes) on gfid-handle %s (path: %s) "
+ "failed",
+ real_path, loc->path);
+ goto out;
+ }
+ posix_update_utime_in_mdata(this, real_path, -1, loc->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
+ posix_update_ctime_in_mdata(this, real_path, -1, loc->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (!valid) {
+ op_ret = sys_lchown(real_path, -1, -1);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
+ "lchown (gfid-handle: %s, path: %s, -1, -1) "
+ "failed",
+ real_path, loc->path);
+
+ goto out;
+ }
+ }
+
+ op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &statpost,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost);
+
+ if (xdata)
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &statpost);
+ posix_update_iatt_buf(&statpre, -1, real_path, xdata);
+ posix_update_iatt_buf(&statpost, -1, real_path, xdata);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, &statpre, &statpost,
+ xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+}
+
+int32_t
+posix_do_fchown(xlator_t *this, int fd, struct iatt *stbuf, int32_t valid)
+{
+ int ret = -1;
+ uid_t uid = -1;
+ gid_t gid = -1;
+
+ if (valid & GF_SET_ATTR_UID)
+ uid = stbuf->ia_uid;
+
+ if (valid & GF_SET_ATTR_GID)
+ gid = stbuf->ia_gid;
+
+ ret = sys_fchown(fd, uid, gid);
+
+ return ret;
+}
+
+int32_t
+posix_do_fchmod(xlator_t *this, int fd, struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ mode_t mode = 0;
+ mode_t mode_bit = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
+ mode_bit = (mode & priv->create_mask) | priv->force_create_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ ret = sys_fchmod(fd, mode);
+out:
+ return ret;
+}
+
+static int
+posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+ struct timeval tv[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+ struct stat stat = {
+ 0,
+ };
+ gf_boolean_t fstat_executed = _gf_false;
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv[0].tv_sec = stbuf->ia_atime;
+ tv[0].tv_usec = stbuf->ia_atime_nsec / 1000;
+ } else {
+ ret = sys_fstat(fd, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED,
+ "%d", fd);
+ goto out;
+ }
+ fstat_executed = _gf_true;
+ /* atime is not given, use current values */
+ tv[0].tv_sec = ST_ATIM_SEC(&stat);
+ tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000;
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv[1].tv_sec = stbuf->ia_mtime;
+ tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000;
+ } else {
+ if (!fstat_executed) {
+ ret = sys_fstat(fd, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED,
+ "%d", fd);
+ goto out;
+ }
+ }
+ /* mtime is not given, use current values */
+ tv[1].tv_sec = ST_MTIM_SEC(&stat);
+ tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000;
+ }
+
+ ret = sys_futimes(fd, tv);
+ if (ret == -1)
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "%d", fd);
+
+out:
+ return ret;
+}
+
+int
+posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+ dict_t *xattr_rsp = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpre);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fsetattr (fstat) failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ op_ret = posix_do_fchown(this, pfd->fd, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED,
+ "fsetattr (fchown) failed"
+ " on fd=%p",
+ fd);
+ goto out;
+ }
+ }
+
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_fchmod(this, pfd->fd, stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHMOD_FAILED,
+ "fsetattr (fchmod) failed"
+ " on fd=%p",
+ fd);
+ goto out;
+ }
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ op_ret = posix_do_futimes(this, pfd->fd, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED,
+ "fsetattr (futimes) on "
+ "failed fd=%p",
+ fd);
+ goto out;
+ }
+ posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
+ posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (!valid) {
+ op_ret = sys_fchown(pfd->fd, -1, -1);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED,
+ "fchown (%d, -1, -1) failed", pfd->fd);
+
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpost);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fsetattr (fstat) failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &statpost);
+
+ if (xdata)
+ xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata,
+ &statpost);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, &statpre, &statpost,
+ xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+}
+
+static int32_t
+posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ struct posix_private *priv = NULL;
+ gf_boolean_t check_space_error = _gf_false;
+ struct stat statbuf = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+
+ /* fallocate case is special so call posix_disk_space_check separately
+ for every fallocate fop instead of calling posix_disk_space with
+ thread after every 5 sec sleep to working correctly storage.reserve
+ option behaviour
+ */
+ if (priv->disk_reserve)
+ posix_disk_space_check(this);
+
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
+
+overwrite:
+ check_space_error = _gf_true;
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (xdata && dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ pthread_mutex_lock(&ctx->write_atomic_lock);
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+ goto unlock;
+ }
+
+ if (xdata) {
+ ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL,
+ xdata, rsp_xdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ ret = -EIO;
+ goto unlock;
+ }
+ }
+
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED,
+ "fallocate failed on %s offset: %jd, "
+ "len:%zu, flags: %d",
+ uuid_utoa(fd->inode->gfid), offset, len, flags);
+ goto unlock;
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+ goto unlock;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
+
+unlock:
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
+#ifdef FALLOC_FL_KEEP_SIZE
+ if (flags & FALLOC_FL_KEEP_SIZE) {
+ goto overwrite;
+ }
+#endif
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (sys_fstat(pfd->fd, &statbuf) < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
+ "%d", pfd->fd);
+ goto out;
+ }
+
+ if (offset + len <= statbuf.st_size) {
+ gf_msg_debug(this->name, 0,
+ "io vector size will not"
+ " change disk size so allow overwrite for"
+ " fd %d",
+ pfd->fd);
+ goto overwrite;
+ }
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+ if (ret == ENOSPC)
+ ret = -ENOSPC;
+
+ return ret;
+}
+
+char *
+_page_aligned_alloc(size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC(1, (size + ALIGN_SIZE), gf_posix_mt_char);
+ if (!alloc_buf)
+ goto out;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF(alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+out:
+ return alloc_buf;
+}
+
+static int32_t
+_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct)
+{
+ off_t num_vect = 0;
+ off_t num_loop = 1;
+ off_t idx = 0;
+ int32_t op_ret = -1;
+ int32_t vect_size = VECTOR_SIZE;
+ off_t remain = 0;
+ off_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+ if (len < VECTOR_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size;
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC(num_vect, sizeof(struct iovec), gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+ if (o_direct) {
+ alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
+ if (!alloc_buf) {
+ GF_FREE(vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC(vect_size, sizeof(char), gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE(vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+ if (sys_lseek(fd, offset, SEEK_SET) < 0) {
+ op_ret = -1;
+ goto err;
+ }
+
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = sys_writev(fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ if (op_ret != (vect_size * num_vect)) {
+ op_ret = -1;
+ errno = ENOSPC;
+ goto err;
+ }
+ }
+ if (extra) {
+ op_ret = sys_writev(fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ if (op_ret != (vect_size * extra)) {
+ op_ret = -1;
+ errno = ENOSPC;
+ goto err;
+ }
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = sys_writev(fd, vector, 1);
+ if (op_ret < 0)
+ goto err;
+ if (op_ret != remain) {
+ op_ret = -1;
+ errno = ENOSPC;
+ goto err;
+ }
+ }
+err:
+ if (o_direct)
+ GF_FREE(alloc_buf);
+ else
+ GF_FREE(iov_base);
+ GF_FREE(vector);
+ return op_ret;
+}
+
+static int32_t
+posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata, dict_t **rsp_xdata)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ int32_t flags = 0;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ pthread_mutex_lock(&ctx->write_atomic_lock);
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd = %p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL,
+ xdata, rsp_xdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state "
+ "check failed, fd %p",
+ fd);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata);
+ /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill.
+ * If it fails, fall back to _posix_do_zerofill() and an optional fsync.
+ */
+ flags = FALLOC_FL_ZERO_RANGE;
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == 0) {
+ goto fsync;
+ } else {
+ ret = -errno;
+ if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) {
+ goto out;
+ }
+ }
+
+ ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
+ if (ret < 0) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED,
+ "zerofill failed on fd %d length %" PRId64, pfd->fd, len);
+ goto out;
+ }
+
+fsync:
+ if (pfd->flags & (O_SYNC | O_DSYNC)) {
+ ret = sys_fsync(pfd->fd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_WRITEV_FAILED,
+ "fsync() in writev on fd"
+ "%d failed",
+ pfd->fd);
+ ret = -errno;
+ goto out;
+ }
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
+
+out:
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+ SET_TO_OLD_FS_ID();
+
+ return ret;
+}
+
+int32_t
+posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+
+#ifdef FALLOC_FL_KEEP_SIZE
+ if (keep_size)
+ flags = FALLOC_FL_KEEP_SIZE;
+#endif /* FALLOC_FL_KEEP_SIZE */
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
+ &statpost, xdata, &rsp_xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ dict_t *rsp_xdata = NULL;
+#ifndef FALLOC_FL_KEEP_SIZE
+ ret = EOPNOTSUPP;
+
+#else /* FALLOC_FL_KEEP_SIZE */
+ int32_t flags = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
+ &statpost, xdata, &rsp_xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+err:
+#endif /* FALLOC_FL_KEEP_SIZE */
+ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ dict_t *rsp_xdata = NULL;
+ gf_boolean_t check_space_error = _gf_false;
+ struct posix_fd *pfd = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, unwind);
+ VALIDATE_OR_GOTO(this, unwind);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+overwrite:
+ check_space_error = _gf_true;
+ ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost,
+ xdata, &rsp_xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+out:
+ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (sys_fstat(pfd->fd, &statbuf) < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
+ "%d", pfd->fd);
+ goto out;
+ }
+
+ if (offset + len <= statbuf.st_size) {
+ gf_msg_debug(this->name, 0,
+ "io vector size will not"
+ " change disk size so allow overwrite for"
+ " fd %d",
+ pfd->fd);
+ goto overwrite;
+ }
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
+ rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ /*
+ * IPC is for inter-translator communication. If one gets here, it
+ * means somebody sent one that nobody else recognized, which is an
+ * error much like an uncaught exception.
+ */
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE,
+ "GF_LOG_IPC(%d) not handled", op);
+ STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL);
+ return 0;
+}
+
+int32_t
+posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+#ifdef HAVE_SEEK_HOLE
+ struct posix_fd *pfd = NULL;
+ off_t ret = -1;
+ int err = 0;
+ int whence = 0;
+ struct iatt preop = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ switch (what) {
+ case GF_SEEK_DATA:
+ whence = SEEK_DATA;
+ break;
+ case GF_SEEK_HOLE:
+ whence = SEEK_HOLE;
+ break;
+ default:
+ err = ENOTSUP;
+ gf_msg(this->name, GF_LOG_ERROR, ENOTSUP, P_MSG_SEEK_UNKOWN,
+ "don't know what to seek");
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &err);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL,
+ xdata, &rsp_xdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ ret = sys_lseek(pfd->fd, offset, whence);
+ if (ret == -1) {
+ err = errno;
+ gf_msg(this->name, fop_log_level(GF_FOP_SEEK, err), err,
+ P_MSG_SEEK_FAILED, "seek failed on fd %d length %" PRId64,
+ pfd->fd, offset);
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(seek, frame, (ret == -1 ? -1 : 0), err,
+ (ret == -1 ? -1 : ret), rsp_xdata);
+#else
+ STACK_UNWIND_STRICT(seek, frame, -1, EINVAL, 0, NULL);
+#endif
+ return 0;
+}
+
+int32_t
+posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ char *real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ DIR *dir = NULL;
+ struct posix_fd *pfd = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = -1;
+ dir = sys_opendir(real_path);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED,
+ "opendir failed on gfid-handle: %s (path: %s)", real_path,
+ loc->path);
+ goto out;
+ }
+
+ op_ret = dirfd(dir);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED,
+ "dirfd() failed (path: %s, gfid-handle: %s", loc->path,
+ real_path);
+ goto out;
+ }
+
+ pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = errno;
+ goto out;
+ }
+
+ pfd->dir = dir;
+ pfd->dir_eof = -1;
+ pfd->fd = op_ret;
+
+ op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd"
+ "context path=%s "
+ "gfid-handle= %s,fd=%p",
+ loc->path, real_path, fd);
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL);
+
+ op_ret = 0;
+
+out:
+ if (op_ret == -1) {
+ if (dir) {
+ (void)sys_closedir(dir);
+ dir = NULL;
+ }
+ if (pfd) {
+ GF_FREE(pfd);
+ pfd = NULL;
+ }
+ }
+
+ SET_TO_OLD_FS_ID();
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+}
+
+static void
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
+{
+ glusterfs_ctx_t *ctx = this->ctx;
+ struct posix_private *priv = this->private;
+
+ pfd->xl = this;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ list_add_tail(&pfd->list, &ctx->janitor_fds);
+ priv->rel_fdcount++;
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+}
+
+int32_t
+posix_releasedir(xlator_t *this, fd_t *fd)
+{
+ struct posix_fd *pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ if (!pfd->dir) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+ "pfd->dir is NULL for fd=%p", fd);
+ goto out;
+ }
+ posix_add_fd_to_cleanup(this, pfd);
+
+out:
+ return 0;
+}
+
+int32_t
+posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ char *dest = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ dest = alloca(size + 1);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", loc->path ? loc->path : "<null>");
+ goto out;
+ }
+
+ op_ret = sys_readlink(real_path, dest, size);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED,
+ "readlink on gfid-handle: %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ dest[op_ret] = 0;
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, dest, &stbuf, NULL);
+
+ return 0;
+}
+
+int32_t
+posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ struct posix_private *priv = NULL;
+ struct iatt prebuf = {
+ 0,
+ };
+ struct iatt postbuf = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &prebuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on (path: %s gfid-handle: %s) "
+ "failed",
+ loc->path, real_path ? real_path : "<null>");
+ goto out;
+ }
+
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, NULL, loc, NULL, &prebuf, real_path,
+ xdata, &rsp_xdata, _gf_false);
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, path %s", loc->path);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&prebuf, -1, real_path, xdata);
+ op_ret = sys_truncate(real_path, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
+ "truncate on gfid-handle: %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postbuf,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on gfid-handle %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &postbuf);
+
+ op_ret = 0;
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ int32_t _fd = -1;
+ struct posix_fd *pfd = NULL;
+ struct posix_private *priv = NULL;
+ struct iatt preop = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ if (loc->inode && ((loc->inode->ia_type == IA_IFBLK) ||
+ (loc->inode->ia_type == IA_IFCHR))) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "open received on a block/char file (%s)",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (flags & O_CREAT)
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &stbuf);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ if (IA_ISLNK(stbuf.ia_type)) {
+ op_ret = -1;
+ op_errno = ELOOP;
+ goto out;
+ }
+
+ op_ret = -1;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ if (priv->o_direct)
+ flags |= O_DIRECT;
+
+ _fd = sys_open(real_path, flags, priv->force_create_mode);
+ if (_fd == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED,
+ "open on gfid-handle %s (path: %s), flags: %d", real_path,
+ loc->path, flags);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
+
+ pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = errno;
+ goto out;
+ }
+
+ pfd->flags = flags;
+ pfd->fd = _fd;
+
+ if (xdata) {
+ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ GF_FREE(pfd);
+ goto out;
+ }
+
+ posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_true);
+ }
+
+ op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context gfid-handle=%s path=%s fd=%p",
+ real_path, loc->path, fd);
+
+ op_ret = 0;
+
+out:
+ if (op_ret == -1) {
+ if (_fd != -1) {
+ sys_close(_fd);
+ }
+ }
+
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata);
+
+ return 0;
+}
+
+int
+posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_private *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vec = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt preop = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+ VALIDATE_OR_GOTO(fd->inode, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "readv received on a block/char file (%s)",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "size=%" GF_PRI_SIZET, size);
+ goto out;
+ }
+
+ iobuf = iobuf_get_page_aligned(this->ctx->iobuf_pool, size, ALIGN_SIZE);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ if (xdata) {
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ op_ret = sys_pread(_fd, iobuf->ptr, size, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READ_FAILED,
+ "read failed on gfid=%s, "
+ "fd=%p, offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ "buf=%p",
+ uuid_utoa(fd->inode->gfid), fd, offset, size, iobuf->ptr);
+ goto out;
+ }
+
+ GF_ATOMIC_ADD(priv->read_value, op_ret);
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = op_ret;
+
+ iobref = iobref_new();
+
+ iobref_add(iobref, iobuf);
+
+ /*
+ * readv successful, and we need to get the stat of the file
+ * we read from
+ */
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &stbuf);
+
+ /* Hack to notify higher layers of EOF. */
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
+ op_errno = ENOENT;
+
+ op_ret = vec.iov_len;
+
+out:
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref,
+ rsp_xdata);
+
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+
+ return 0;
+}
+
+int32_t
+__posix_pwritev(int fd, struct iovec *vector, int count, off_t offset)
+{
+ int32_t op_ret = 0;
+ int idx = 0;
+ int retval = 0;
+ off_t internal_off = 0;
+
+ if (!vector)
+ return -EFAULT;
+
+ internal_off = offset;
+ for (idx = 0; idx < count; idx++) {
+ retval = sys_pwrite(fd, vector[idx].iov_base, vector[idx].iov_len,
+ internal_off);
+ if (retval == -1) {
+ op_ret = -errno;
+ goto err;
+ }
+ op_ret += retval;
+ internal_off += retval;
+ }
+
+err:
+ return op_ret;
+}
+
+int32_t
+__posix_writev(int fd, struct iovec *vector, int count, off_t startoff,
+ int odirect)
+{
+ int32_t op_ret = 0;
+ int idx = 0;
+ int max_buf_size = 0;
+ int retval = 0;
+ char *buf = NULL;
+ char *alloc_buf = NULL;
+ off_t internal_off = 0;
+
+ /* Check for the O_DIRECT flag during open() */
+ if (!odirect)
+ return __posix_pwritev(fd, vector, count, startoff);
+
+ for (idx = 0; idx < count; idx++) {
+ if (max_buf_size < vector[idx].iov_len)
+ max_buf_size = vector[idx].iov_len;
+ }
+
+ alloc_buf = _page_aligned_alloc(max_buf_size, &buf);
+ if (!alloc_buf) {
+ op_ret = -errno;
+ goto err;
+ }
+
+ internal_off = startoff;
+ for (idx = 0; idx < count; idx++) {
+ memcpy(buf, vector[idx].iov_base, vector[idx].iov_len);
+
+ /* not sure whether writev works on O_DIRECT'd fd */
+ retval = sys_pwrite(fd, buf, vector[idx].iov_len, internal_off);
+ if (retval == -1) {
+ op_ret = -errno;
+ goto err;
+ }
+
+ op_ret += retval;
+ internal_off += retval;
+ }
+
+err:
+ GF_FREE(alloc_buf);
+
+ return op_ret;
+}
+
+dict_t *
+_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ if (fd)
+ inode = fd->inode;
+
+ if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, P_MSG_XATTR_FAILED,
+ "fd: %p inode: %p"
+ "gfid:%s",
+ fd, inode ? inode : 0,
+ inode ? uuid_utoa(inode->gfid) : "N/A");
+ goto out;
+ }
+
+ if (!xdata)
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa(fd->inode->gfid), GLUSTERFS_OPEN_FD_COUNT);
+ }
+ }
+
+ if (dict_get(xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
+ fd->inode->active_fd_count);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa(fd->inode->gfid), GLUSTERFS_ACTIVE_FD_COUNT);
+ }
+ }
+
+ if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa(fd->inode->gfid), GLUSTERFS_WRITE_IS_APPEND);
+ }
+ }
+out:
+ return rsp_xdata;
+}
+
+int32_t
+posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t write_append = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ gf_boolean_t check_space_error = _gf_false;
+ struct stat statbuf = {
+ 0,
+ };
+ int totlen = 0;
+ int idx = 0;
+
+ VALIDATE_OR_GOTO(frame, unwind);
+ VALIDATE_OR_GOTO(this, unwind);
+ VALIDATE_OR_GOTO(fd, unwind);
+ VALIDATE_OR_GOTO(fd->inode, unwind);
+ VALIDATE_OR_GOTO(vector, unwind);
+ VALIDATE_OR_GOTO(this->private, unwind);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO(priv, unwind);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+overwrite:
+
+ check_space_error = _gf_true;
+ if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "writev received on a block/char file (%s)",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ ret = posix_check_internal_writes(this, fd, _fd, xdata);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "possible overwrite from internal client, fd=%p", fd);
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (xdata) {
+ if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND))
+ write_append = _gf_true;
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /* The write_is_append check and write must happen
+ atomically. Else another write can overtake this
+ write after the check and get written earlier.
+
+ So lock before preop-stat and unlock after write.
+ */
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators (shard
+ * as of today).
+ */
+
+ op_ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (write_append || update_atomic) {
+ locked = _gf_true;
+ pthread_mutex_lock(&ctx->write_atomic_lock);
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ if (locked && write_append) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+ }
+
+ op_ret = __posix_writev(_fd, vector, count, offset,
+ (pfd->flags & O_DIRECT));
+
+ if (locked && (!update_atomic)) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED,
+ "write failed: offset %" PRIu64 ",", offset);
+ goto out;
+ }
+
+ rsp_xdata = _fill_writev_xdata(fd, xdata, this, is_append);
+ /* writev successful, we also need to get the stat of
+ * the file we wrote to
+ */
+
+ ret = posix_fdstat(this, fd->inode, _fd, &postop);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop);
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (flags & (O_SYNC | O_DSYNC)) {
+ ret = sys_fsync(_fd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITEV_FAILED,
+ "fsync() in writev on fd %d failed", _fd);
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ }
+
+ GF_ATOMIC_ADD(priv->write_value, op_ret);
+
+out:
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto unwind;
+ }
+
+ if (sys_fstat(pfd->fd, &statbuf) < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
+ "%d", pfd->fd);
+ goto unwind;
+ }
+
+ for (idx = 0; idx < count; idx++) {
+ totlen = vector[idx].iov_len;
+ }
+
+ if ((offset + totlen <= statbuf.st_size) &&
+ !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) {
+ gf_msg_debug(this->name, 0,
+ "io vector size will not"
+ " change disk size so allow overwrite for"
+ " fd %d",
+ pfd->fd);
+ goto overwrite;
+ }
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop,
+ rsp_xdata);
+
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd_in = -1;
+ int _fd_out = -1;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd_in = NULL;
+ struct posix_fd *pfd_out = NULL;
+ struct iatt preop_dst = {
+ 0,
+ };
+ struct iatt postop_dst = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ char in_uuid_str[64] = {0}, out_uuid_str[64] = {0};
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd_in, out);
+ VALIDATE_OR_GOTO(fd_in->inode, out);
+ VALIDATE_OR_GOTO(fd_out, out);
+ VALIDATE_OR_GOTO(fd_out->inode, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO(priv, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_in);
+ goto out;
+ }
+
+ _fd_in = pfd_in->fd;
+
+ ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_out);
+ goto out;
+ }
+
+ _fd_out = pfd_out->fd;
+
+ /*
+ * Currently, the internal write is checked via xdata which
+ * is set by some xlator above. It could be due to several of
+ * the reasons such as healing or a snapshot operation happening
+ * using copy_file_range. As of now (i.e. writing the patch with
+ * this change) none of the xlators above posix are using the
+ * internal write with copy_file_range. In future it might
+ * change. Atleast as of now the hope is that, when that happens
+ * this functon or fop does not require additional changes for
+ * handling internal writes.
+ */
+ ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "possible overwrite from internal client, fd=%p", fd_out);
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (xdata) {
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators.
+ * This is similar to the atomic write operation. atmoic write is
+ * (i.e. prestat + write + poststat) used by shard as of now. In case,
+ * some xlator needs copy_file_range to be atomic from prestat and postat
+ * prespective (i.e. prestat + copy_file_range + poststat) then it has
+ * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata.
+ */
+
+ op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (update_atomic) {
+ ret = pthread_mutex_lock(&ctx->write_atomic_lock);
+ if (!ret)
+ locked = _gf_true;
+ else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED,
+ "failed to hold write atomic lock on %s",
+ uuid_utoa(fd_out->inode->gfid));
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Since, only the destination file (fd_out) is undergoing
+ * modification, the write related tests are done on that.
+ * i.e. this is treater similar to as if the destination file
+ * undergoing write fop from maintenance perspective.
+ */
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst,
+ NULL, xdata, &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd_out);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ /*
+ * NOTE: This is just doing a single execution of copy_file_range
+ * system call. If the returned value of this system call is less
+ * than len, then should we keep doing it in a for loop until the
+ * copy_file_range of all the len bytes is done?
+ * Check the example program provided in the man page of
+ * copy_file_range.
+ * If so, then a separate variables for both off_in and off_out
+ * should be used which are initialized to off_in and off_out
+ * that this function call receives, but then advanced by the
+ * value returned by sys_copy_file_range and then use that as
+ * off_in and off_out for next instance of copy_file_range execution.
+ */
+ op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len,
+ flags);
+
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
+ "copy_file_range failed: fd_in: %p (gfid: %s) ,"
+ " fd_out %p (gfid:%s)",
+ fd_in, uuid_utoa_r(fd_in->inode->gfid, in_uuid_str), fd_out,
+ uuid_utoa_r(fd_out->inode->gfid, out_uuid_str));
+ goto out;
+ }
+
+ /*
+ * Let this be as it is for now. This function collects
+ * infomration such as open fd count etc. So, even though
+ * is_append does not apply to copy_file_range, for now,
+ * allowing it to be recorded in the dict as _gf_false.
+ */
+ rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append);
+
+ /* copy_file_range successful, we also need to get the stat of
+ * the file we wrote to (i.e. destination file or fd_out).
+ */
+ ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Also perform the stat on the source fd (i.e. fd_in). For now,
+ * allowing it to be done within the locked region if the request
+ * is for atomic operation (and update) of copy_file_range.
+ */
+ ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_in);
+ goto out;
+ }
+
+ /*
+ * The core logic of what time attributes are to be updated
+ * on a fop is decided at client side xlator utime.
+ * All the remaining fops call posix_set_ctime function
+ * to update the {a,m,c}time. But, for all the other fops,
+ * the operation is happening on only one file (or inode).
+ * But here, there are 2 fds (source and destination). Hence
+ * the new function below to update the appropriate times for
+ * both the source and the destination file.
+ * For the source file, if at all anything has to be updated,
+ * it would be atime (as that file is only read, not updated).
+ * For the destination file, the attributes that require the
+ * modification would be mtime and ctime.
+ * What times have to be changed is actually determined by
+ * utime xlator. But, all of them would be in frame->root->flags.
+ * So, currently posix assumes that, the atime flag is for
+ * the source file and the other 2 flags are for the destination
+ * file. Since, the assumption is rigid (i.e. atime for source
+ * and {m,c}time for destination), the below function is called
+ * posix_set_ctime_cfr (cfr standing for copy_file_range).
+ * FUTURE TODO:
+ * In future, some other functionality or fop might operate
+ * simultaneously on 2 files. Then, depending upon what that new
+ * fop does or what are its requirements, the below function might
+ * require changes to become generic for consumption in case of
+ * simultaneous operations on 2 files.
+ */
+ posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf,
+ NULL, pfd_out->fd, fd_out->inode, &postop_dst);
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ /*
+ * Record copy_file_range in priv->write_value for now.
+ * If not needed, remove below section of code along with
+ * this comment (or add comment to explain why it is not
+ * needed).
+ */
+ GF_ATOMIC_ADD(priv->write_value, op_ret);
+
+out:
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf,
+ &preop_dst, &postop_dst, rsp_xdata);
+
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ char *real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct statvfs buf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ int shared_by = 1;
+ double percent = 0;
+ uint64_t reserved_blocks = 0;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ priv = this->private;
+
+ op_ret = sys_statvfs(real_path, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on gfid-handle %s (path: %s)", real_path,
+ loc->path);
+ goto out;
+ }
+
+ if (priv->disk_unit == 'p') {
+ percent = priv->disk_reserve;
+ reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5);
+ } else {
+ if (buf.f_bsize) {
+ reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) /
+ buf.f_bsize;
+ }
+ }
+
+ if (buf.f_bfree > reserved_blocks) {
+ buf.f_bfree = (buf.f_bfree - reserved_blocks);
+ if (buf.f_bavail > buf.f_bfree) {
+ buf.f_bavail = buf.f_bfree;
+ }
+ } else {
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ }
+
+ shared_by = priv->shared_brick_count;
+ if (shared_by > 1) {
+ buf.f_blocks /= shared_by;
+ buf.f_bfree /= shared_by;
+ buf.f_bavail /= shared_by;
+ buf.f_files /= shared_by;
+ buf.f_ffree /= shared_by;
+ buf.f_favail /= shared_by;
+ }
+
+ if (!priv->export_statfs) {
+ buf.f_blocks = 0;
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ buf.f_files = 0;
+ buf.f_ffree = 0;
+ buf.f_favail = 0;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, NULL);
+ return 0;
+}
+
+int32_t
+posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ struct posix_fd *pfd = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+posix_release(xlator_t *this, fd_t *fd)
+{
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ if (pfd->dir) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL,
+ "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
+ }
+
+ posix_add_fd_to_cleanup(this, pfd);
+
+out:
+ return 0;
+}
+
+int
+posix_batch_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub(frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock(&priv->fsync_mutex);
+ {
+ list_add_tail(&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal(&priv->fsync_cond);
+ }
+ pthread_mutex_unlock(&priv->fsync_mutex);
+
+ return 0;
+}
+
+int32_t
+posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+#ifdef GF_DARWIN_HOST_OS
+ /* Always return success in case of fsync in MAC OS X */
+ op_ret = 0;
+ goto out;
+#endif
+
+ priv = this->private;
+
+ if (priv->batch_fsync_mode && xdata && dict_get(xdata, "batch-fsync")) {
+ posix_batch_fsync(frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd not found in fd's ctx");
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (datasync) {
+ op_ret = sys_fdatasync(_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED,
+ "fdatasync on fd=%p"
+ "failed:",
+ fd);
+ goto out;
+ }
+ } else {
+ op_ret = sys_fsync(_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED,
+ "fsync on fd=%p "
+ "failed",
+ fd);
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &postop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, &preop, &postop, NULL);
+
+ return 0;
+}
+
+static int gf_posix_xattr_enotsup_log;
+static int
+_handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
+ filler->flags, filler->stbuf);
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static int
+map_xattr_flags(int flags)
+{
+ /* DARWIN has different defines on XATTR_ flags.
+ There do not seem to be a POSIX standard
+ Parse any other flags over.
+ */
+ int darwinflags = flags &
+ ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
+ if (GF_XATTR_CREATE & flags)
+ darwinflags |= XATTR_CREATE;
+ if (GF_XATTR_REPLACE & flags)
+ darwinflags |= XATTR_REPLACE;
+ return darwinflags;
+}
+#endif
+
+int32_t
+posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *acl_xattr = NULL;
+ struct iatt preop = {0};
+ struct iatt postop = {0};
+ int32_t ret = 0;
+ ssize_t acl_size = 0;
+ dict_t *xattr = NULL;
+ dict_t *subvol_xattrs = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ struct iatt tmp_stbuf = {
+ 0,
+ };
+ data_t *tdata = NULL;
+ char *cs_var = NULL;
+ gf_cs_obj_state state = -1;
+ int i = 0;
+ int len;
+ struct mdata_iatt mdata_iatt = {
+ 0,
+ };
+ int8_t sync_backend_xattrs = _gf_false;
+ data_pair_t *custom_xattrs;
+ data_t *keyval = NULL;
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(dict, out);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt);
+ if (ret == 0) {
+ /* This is initiated by lookup when ctime feature is enabled to create
+ * "trusted.glusterfs.mdata" xattr if not present. These are the files
+ * which were created when ctime feature is disabled.
+ */
+ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
+ &mdata_iatt, &op_errno);
+ if (ret != 0) {
+ op_ret = -1;
+ }
+ goto out;
+ }
+
+ posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false);
+
+ op_ret = -1;
+
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+ /* the io-stats-dump key should not reach disk */
+ dict_del(dict, GF_XATTR_IOSTATS_DUMP_KEY);
+
+ tdata = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
+ if (tdata) {
+ /*TODO: move the following to a different function */
+ LOCK(&loc->inode->lock);
+ {
+ state = posix_cs_check_status(this, real_path, NULL, &preop);
+ if (state != GF_CS_LOCAL) {
+ op_errno = EINVAL;
+ ret = posix_cs_set_state(this, &xattr, state, real_path, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed");
+ }
+ goto unlock;
+ }
+
+ ret = posix_pstat(this, loc->inode, loc->gfid, real_path,
+ &tmp_stbuf, _gf_true);
+ if (ret) {
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ cs_var = alloca(4096);
+ sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime);
+
+ /*TODO: may be should consider nano-second also */
+ if (strncmp(cs_var, tdata->data, tdata->len) > 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "mtime "
+ "passed is different from seen by file now."
+ " Will skip truncating the file");
+ ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size);
+
+ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len,
+ flags);
+ if (ret) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE,
+ ret);
+ goto unlock;
+ }
+
+ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks);
+
+ ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len,
+ flags);
+ if (ret) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret);
+ goto unlock;
+ }
+
+ len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize);
+
+ ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len,
+ flags);
+ if (ret) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret);
+ goto unlock;
+ }
+
+ memset(cs_var, 0, 4096);
+ if (loc->path[0] == '/') {
+ for (i = 1; i < strlen(loc->path); i++) {
+ cs_var[i - 1] = loc->path[i];
+ }
+
+ cs_var[i] = '\0';
+ gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var);
+ }
+
+ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var,
+ strlen(cs_var), flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "setxattr failed - %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+ goto unlock;
+ }
+
+ ret = sys_truncate(real_path, 0);
+ if (ret) {
+ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "truncate failed - %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+ ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE);
+ if (ret) {
+ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "removexattr "
+ "failed post processing- %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+ }
+ goto unlock;
+ } else {
+ state = GF_CS_REMOTE;
+ ret = posix_cs_set_state(this, &xattr, state, real_path, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed");
+ }
+ }
+ }
+ unlock:
+ UNLOCK(&loc->inode->lock);
+ op_ret = ret;
+ goto out;
+ }
+
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.stbuf = &preop;
+ filler.loc = loc;
+
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach(dict, _handle_setxattr_keyvalue_pair, &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
+ }
+
+ if (sync_backend_xattrs) {
+ /* List all custom xattrs */
+ subvol_xattrs = dict_new();
+ if (!subvol_xattrs)
+ goto out;
+
+ ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "Unable to set list-xattr in dict ");
+ goto out;
+ }
+
+ subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ NULL);
+
+ /* Remove all user xattrs from the file */
+ dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
+ real_path);
+
+ /* Remove all custom xattrs from the file */
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
+ if (keyval) {
+ ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
+ errno, "removexattr failed. key %s path %s",
+ xattrs_to_heal[i], loc->path);
+ goto out;
+ }
+
+ dict_del(subvol_xattrs, xattrs_to_heal[i]);
+ keyval = NULL;
+ }
+ }
+
+ /* Set custom xattrs based on info provided by DHT */
+ custom_xattrs = dict->members_list;
+
+ while (custom_xattrs != NULL) {
+ ret = sys_lsetxattr(real_path, custom_xattrs->key,
+ custom_xattrs->value->data,
+ custom_xattrs->value->len, flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
+ custom_xattrs->key, ret);
+ goto out;
+ }
+
+ custom_xattrs = custom_xattrs->next;
+ }
+ }
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ /*
+ * FIXFIX: Send the stbuf info in the xdata for now
+ * This is used by DHT to redirect FOPs if the file is being migrated
+ * Ignore errors for now
+ */
+ ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postop,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ ret = posix_set_iatt_in_dict(xattr, &preop, &postop);
+
+ /*
+ * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which
+ * won't aware of access-control xlator. To update its context correctly,
+ * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path.
+ */
+ if (dict_get(dict, GF_POSIX_ACL_ACCESS)) {
+ /*
+ * The size of buffer will be know after calling sys_lgetxattr,
+ * so first we allocate buffer with large size(~4k), then we
+ * reduced into required size using GF_REALLO().
+ */
+ acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char);
+ if (!acl_xattr)
+ goto out;
+
+ acl_size = sys_lgetxattr(real_path, POSIX_ACL_ACCESS_XATTR, acl_xattr,
+ ACL_BUFFER_MAX);
+
+ if (acl_size < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "Posix acl is not set "
+ "properly at the backend");
+ goto out;
+ }
+
+ /* If acl_size is more than max buffer size, just ignore it */
+ if (acl_size >= ACL_BUFFER_MAX) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW,
+ "size of acl is more"
+ "than the buffer");
+ goto out;
+ }
+
+ acl_xattr = GF_REALLOC(acl_xattr, acl_size);
+ if (!acl_xattr)
+ goto out;
+
+ ret = dict_set_bin(xattr, POSIX_ACL_ACCESS_XATTR, acl_xattr, acl_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set"
+ "xdata for acl");
+ GF_FREE(acl_xattr);
+ goto out;
+ }
+ }
+
+ if (dict_get(dict, GF_POSIX_ACL_DEFAULT)) {
+ acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char);
+ if (!acl_xattr)
+ goto out;
+
+ acl_size = sys_lgetxattr(real_path, POSIX_ACL_DEFAULT_XATTR, acl_xattr,
+ ACL_BUFFER_MAX);
+
+ if (acl_size < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "Posix acl is not set "
+ "properly at the backend");
+ goto out;
+ }
+
+ if (acl_size >= ACL_BUFFER_MAX) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW,
+ "size of acl is more"
+ "than the buffer");
+ goto out;
+ }
+
+ acl_xattr = GF_REALLOC(acl_xattr, acl_size);
+ if (!acl_xattr)
+ goto out;
+
+ ret = dict_set_bin(xattr, POSIX_ACL_DEFAULT_XATTR, acl_xattr, acl_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set"
+ "xdata for acl");
+ GF_FREE(acl_xattr);
+ goto out;
+ }
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ if (subvol_xattrs)
+ dict_unref(subvol_xattrs);
+
+ return 0;
+}
+
+int
+posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *dict, dict_t *xdata)
+{
+ int ret = -1;
+ int op_ret = -1;
+ const char *fname = NULL;
+ char *real_path = NULL;
+ char *found = NULL;
+ DIR *fd = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ return -ESTALE;
+ }
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "posix_xattr_get_real_filename (lstat) on "
+ "gfid-handle %s (path: %s) failed",
+ real_path, loc->path);
+ return -errno;
+ }
+
+ fd = sys_opendir(real_path);
+ if (!fd)
+ return -errno;
+
+ fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(fd, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcasecmp(entry->d_name, fname) == 0) {
+ found = gf_strdup(entry->d_name);
+ if (!found) {
+ (void)sys_closedir(fd);
+ return -ENOMEM;
+ }
+ break;
+ }
+ }
+
+ (void)sys_closedir(fd);
+
+ if (!found)
+ return -ENOATTR;
+
+ ret = dict_set_dynstr(dict, (char *)key, found);
+ if (ret) {
+ GF_FREE(found);
+ return -ENOMEM;
+ }
+ ret = strlen(found) + 1;
+
+ return ret;
+}
+
+int
+posix_get_ancestry_directory(xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ ssize_t handle_size = 0;
+ struct posix_private *priv = NULL;
+ inode_t *inode = NULL;
+ int ret = -1;
+ char dirpath[PATH_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ ret = posix_make_ancestryfromgfid(
+ this, dirpath, PATH_MAX + 1, head, type | POSIX_ANCESTRY_PATH,
+ leaf_inode->gfid, handle_size, priv->base_path, leaf_inode->table,
+ &inode, xdata, op_errno);
+ if (ret < 0)
+ goto out;
+
+ /* there is already a reference in loc->inode */
+ inode_unref(inode);
+
+ if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) {
+ if (strcmp(dirpath, "/"))
+ dirpath[strlen(dirpath) - 1] = '\0';
+
+ *path = gf_strdup(dirpath);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode,
+ inode_t *parent, struct stat *stbuf,
+ gf_dirent_t *head, char **path, int type,
+ dict_t *xdata, int32_t *op_errno)
+{
+ int op_ret = -1;
+ gf_dirent_t *gf_entry = NULL;
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char temppath[PATH_MAX] = {
+ 0,
+ };
+ char scr[PATH_MAX * 4] = {
+ 0,
+ };
+
+ this = THIS;
+
+ priv = this->private;
+
+ dirp = sys_opendir(dirpath);
+ if (!dirp) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED,
+ "could not opendir %s", dirpath);
+ goto out;
+ }
+
+ while (count > 0) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (entry->d_ino != stbuf->st_ino)
+ continue;
+
+ /* Linking an inode here, can cause a race in posix_acl.
+ Parent inode gets linked here, but before
+ it reaches posix_acl_readdirp_cbk, create/lookup can
+ come on a leaf-inode, as parent-inode-ctx not yet updated
+ in posix_acl_readdirp_cbk, create and lookup can fail
+ with EACCESS. So do the inode linking in the quota xlator
+
+ linked_inode = inode_link (leaf_inode, parent,
+ entry->d_name, NULL);
+
+ GF_ASSERT (linked_inode == leaf_inode);
+ inode_unref (linked_inode);*/
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(leaf_inode);
+ gf_uuid_copy(loc.gfid, leaf_inode->gfid);
+
+ (void)snprintf(temppath, sizeof(temppath), "%s/%s", dirpath,
+ entry->d_name);
+
+ gf_entry = gf_dirent_for_name(entry->d_name);
+ if (!gf_entry) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL");
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ inode_unref(loc.inode);
+ goto out;
+ }
+ gf_entry->inode = inode_ref(leaf_inode);
+ gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1,
+ xdata, NULL);
+ iatt_from_stat(&(gf_entry->d_stat), stbuf);
+
+ list_add_tail(&gf_entry->list, &head->list);
+ loc_wipe(&loc);
+ }
+
+ if (type & POSIX_ANCESTRY_PATH) {
+ (void)snprintf(temppath, sizeof(temppath), "%s/%s",
+ &dirpath[priv->base_path_length], entry->d_name);
+ if (!*path) {
+ *path = gf_strdup(temppath);
+ } else {
+ /* creating a colon separated */
+ /* list of hard links */
+ (void)snprintf(scr, sizeof(scr), "%s:%s", *path, temppath);
+
+ GF_FREE(*path);
+ *path = gf_strdup(scr);
+ }
+ if (!*path) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ count--;
+ }
+
+ op_ret = 0;
+out:
+ if (dirp) {
+ op_ret = sys_closedir(dirp);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_CLOSE_FAILED,
+ "closedir failed");
+ }
+ }
+
+ return op_ret;
+}
+
+int
+posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ size_t remaining_size = 0;
+ int op_ret = -1, pathlen = -1;
+ ssize_t handle_size = 0;
+ uuid_t pgfid = {
+ 0,
+ };
+ int nlink_samepgfid = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char *list = NULL;
+ int32_t list_offset = 0;
+ struct posix_private *priv = NULL;
+ ssize_t size = 0;
+ inode_t *parent = NULL;
+ loc_t *loc = NULL;
+ char *leaf_path = NULL;
+ char key[4096] = {
+ 0,
+ };
+ char dirpath[PATH_MAX] = {
+ 0,
+ };
+ char pgfidstr[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+ int len;
+
+ priv = this->private;
+
+ loc = GF_CALLOC(1, sizeof(*loc), gf_posix_mt_char);
+ if (loc == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(loc->gfid, leaf_inode->gfid);
+
+ MAKE_INODE_HANDLE(leaf_path, this, loc, NULL);
+ if (!leaf_path) {
+ GF_FREE(loc);
+ *op_errno = ESTALE;
+ goto out;
+ }
+ GF_FREE(loc);
+
+ size = sys_llistxattr(leaf_path, NULL, 0);
+ if (size == -1) {
+ *op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
+
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on"
+ "%s",
+ leaf_path);
+ }
+
+ goto out;
+ }
+
+ if (size == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ list = alloca(size);
+ if (!list) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ size = sys_llistxattr(leaf_path, list, size);
+ if (size < 0) {
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+ remaining_size = size;
+ list_offset = 0;
+
+ op_ret = sys_lstat(leaf_path, &stbuf);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", leaf_path);
+ goto out;
+ }
+
+ while (remaining_size > 0) {
+ len = snprintf(key, sizeof(key), "%s", list + list_offset);
+ if (strncmp(key, PGFID_XATTR_KEY_PREFIX,
+ SLEN(PGFID_XATTR_KEY_PREFIX)) != 0)
+ goto next;
+
+ op_ret = sys_lgetxattr(leaf_path, key, &nlink_samepgfid,
+ sizeof(nlink_samepgfid));
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on "
+ "%s: key = %s ",
+ leaf_path, key);
+ goto out;
+ }
+
+ nlink_samepgfid = ntoh32(nlink_samepgfid);
+
+ snprintf(pgfidstr, sizeof(pgfidstr), "%s",
+ key + SLEN(PGFID_XATTR_KEY_PREFIX));
+ gf_uuid_parse(pgfidstr, pgfid);
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ /* constructing the absolute real path of parent dir */
+ snprintf(dirpath, sizeof(dirpath), "%s", priv->base_path);
+ pathlen = PATH_MAX + 1 - priv->base_path_length;
+
+ op_ret = posix_make_ancestryfromgfid(
+ this, dirpath + priv->base_path_length, pathlen, head,
+ type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path,
+ leaf_inode->table, &parent, xdata, op_errno);
+ if (op_ret < 0) {
+ goto next;
+ }
+
+ dirpath[strlen(dirpath) - 1] = '\0';
+
+ posix_links_in_same_directory(dirpath, nlink_samepgfid, leaf_inode,
+ parent, &stbuf, head, path, type, xdata,
+ op_errno);
+
+ if (parent != NULL) {
+ inode_unref(parent);
+ parent = NULL;
+ }
+
+ next:
+ remaining_size -= (len + 1);
+ list_offset += (len + 1);
+ } /* while (remaining_size > 0) */
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+int
+posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head,
+ char **path, int type, int32_t *op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (IA_ISDIR(leaf_inode->ia_type)) {
+ ret = posix_get_ancestry_directory(this, leaf_inode, head, path, type,
+ op_errno, xdata);
+ } else {
+ if (!priv->update_pgfid_nlinks)
+ goto out;
+ ret = posix_get_ancestry_non_directory(this, leaf_inode, head, path,
+ type, op_errno, xdata);
+ }
+
+out:
+ if (ret && path && *path) {
+ GF_FREE(*path);
+ *path = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * posix_getxattr - this function returns a dictionary with all the
+ * key:value pair present as xattr. used for
+ * both 'listxattr' and 'getxattr'.
+ */
+int32_t
+posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ struct posix_private *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *value = NULL;
+ char *real_path = NULL;
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *path = NULL;
+ char *rpath = NULL;
+ ssize_t size = 0;
+ char *list = NULL;
+ int32_t list_offset = 0;
+ size_t remaining_size = 0;
+ char *host_buf = NULL;
+ char *keybuffer = NULL;
+ int keybuff_len;
+ char *value_buf = NULL;
+ gf_boolean_t have_val = _gf_false;
+ struct iatt buf = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+
+ op_ret = -1;
+ priv = this->private;
+
+ ret = posix_handle_georep_xattrs(frame, name, &op_errno, _gf_true);
+ if (ret == -1) {
+ op_ret = -1;
+ /* errno should be set from the above function*/
+ goto out;
+ }
+
+ ret = posix_handle_mdata_xattr(frame, name, &op_errno);
+ if (ret == -1) {
+ op_ret = -1;
+ /* errno should be set from the above function*/
+ goto out;
+ }
+
+ if (name && posix_is_gfid2path_xattr(name)) {
+ op_ret = -1;
+ op_errno = ENOATTR;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) {
+ ret = posix_pacl_get(real_path, -1, name, &value);
+ if (ret || !value) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED,
+ "could not get acl (%s) for"
+ "gfid-handle %s (path: %s)",
+ name, real_path, loc->path);
+ op_ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(dict, (char *)name, value);
+ if (ret < 0) {
+ GF_FREE(value);
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED,
+ "could not set acl (%s) for %s "
+ "(gfid-handle: %s) in dictionary",
+ name, loc->path, real_path);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strncmp(name, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
+ ret = posix_xattr_get_real_filename(frame, this, loc, name, dict,
+ xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ if (op_errno == ENOATTR) {
+ gf_msg_debug(this->name, 0,
+ "Failed to get "
+ "real filename (%s, %s)",
+ loc->path, name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_GETTING_FILENAME_FAILED,
+ "Failed to get real filename (%s, %s):", loc->path,
+ name);
+ }
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
+ if (loc->inode && name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) {
+ if (!fd_list_empty(loc->inode)) {
+ ret = dict_set_uint32(dict, (char *)name, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value for %s",
+ name);
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else {
+ ret = dict_set_uint32(dict, (char *)name, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value for %s",
+ name);
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+ goto done;
+ }
+ if (loc->inode && name && (XATTR_IS_PATHINFO(name))) {
+ VALIDATE_OR_GOTO(this->private, out);
+ if (LOC_HAS_ABSPATH(loc)) {
+ MAKE_REAL_PATH(rpath, this, loc->path);
+ } else {
+ rpath = real_path;
+ }
+ size = gf_asprintf(
+ &host_buf, "<POSIX(%s):%s:%s>", priv->base_path,
+ ((priv->node_uuid_pathinfo && !gf_uuid_is_null(priv->glusterd_uuid))
+ ? uuid_utoa(priv->glusterd_uuid)
+ : priv->hostname),
+ rpath);
+ if (size < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr(dict, (char *)name, host_buf);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "could not set value"
+ " (%s) in dictionary",
+ host_buf);
+ GF_FREE(host_buf);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GF_XATTR_NODE_UUID_KEY) == 0) &&
+ !gf_uuid_is_null(priv->glusterd_uuid)) {
+ size = gf_asprintf(&host_buf, "%s", uuid_utoa(priv->glusterd_uuid));
+ if (size == -1) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr(dict, GF_XATTR_NODE_UUID_KEY, host_buf);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "could not set value"
+ "(%s) in dictionary",
+ host_buf);
+ GF_FREE(host_buf);
+ op_errno = -ret;
+ goto out;
+ }
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GFID_TO_PATH_KEY) == 0)) {
+ ret = inode_path(loc->inode, NULL, &path);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_INODE_PATH_GET_FAILED,
+ "%s: could not get "
+ "inode path",
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ size = ret;
+ ret = dict_set_dynstr(dict, GFID_TO_PATH_KEY, path);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ GF_FREE(path);
+ goto out;
+ }
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GFID2PATH_VIRT_XATTR_KEY) == 0)) {
+ if (!priv->gfid2path) {
+ op_errno = ENOATTR;
+ op_ret = -1;
+ goto out;
+ }
+ ret = posix_get_gfid2path(this, loc->inode, real_path, &op_errno, dict);
+ if (ret < 0) {
+ op_ret = -1;
+ goto out;
+ }
+ size = ret;
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GET_ANCESTRY_PATH_KEY) == 0)) {
+ int type = POSIX_ANCESTRY_PATH;
+
+ op_ret = posix_get_ancestry(this, loc->inode, NULL, &path, type,
+ &op_errno, xdata);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ }
+ size = op_ret;
+ op_ret = dict_set_dynstr(dict, GET_ANCESTRY_PATH_KEY, path);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -op_ret,
+ P_MSG_GET_KEY_VALUE_FAILED,
+ "could not get "
+ "value for key (%s)",
+ GET_ANCESTRY_PATH_KEY);
+ GF_FREE(path);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
+ op_ret = posix_get_objectsignature(real_path, dict);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ /* here allocate value_buf of 8192 bytes to avoid one extra getxattr
+ call,If buffer size is small to hold the xattr result then it will
+ allocate a new buffer value of required size and call getxattr again
+ */
+
+ value_buf = alloca(XATTR_VAL_BUF_SIZE);
+ if (name) {
+ char *key = (char *)name;
+
+ keybuffer = key;
+#if defined(GF_DARWIN_HOST_OS_DISABLED)
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(key, "user.", 5) == 0) {
+ key += 5;
+ gf_msg_debug(this->name, 0,
+ "getxattr for file %s (gfid-handle: %s)"
+ " stripping user key: %s -> %s",
+ loc->path, real_path, keybuffer, key);
+ }
+ }
+#endif
+ size = sys_lgetxattr(real_path, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed due to overflow of buffer"
+ " on gfid-handle %s (path: %s) : %s ",
+ real_path, loc->path, key);
+ size = sys_lgetxattr(real_path, key, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ }
+ if ((op_errno == ENOATTR) || (op_errno == ENODATA)) {
+ gf_msg_debug(this->name, 0,
+ "No such attribute:%s for file %s (path: %s)",
+ key, real_path, loc->path);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_XATTR_FAILED,
+ "getxattr failed on "
+ "%s (path: %s): %s ",
+ real_path, loc->path, key);
+ }
+ goto out;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_lgetxattr(real_path, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s (path: %s): key = %s", real_path,
+ loc->path, key);
+ GF_FREE(value);
+ goto out;
+ }
+ }
+ value[size] = '\0';
+ op_ret = dict_set_dynptr(dict, key, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on %s (gfid-handle: %s) for the key %s failed.",
+ loc->path, real_path, key);
+ GF_FREE(value);
+ goto out;
+ }
+
+ goto done;
+ }
+
+ have_val = _gf_false;
+ size = sys_llistxattr(real_path, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size > 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed due to overflow of buffer"
+ " on %s (path: %s) ",
+ real_path, loc->path);
+ size = sys_llistxattr(real_path, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on %s (path: %s)", real_path,
+ loc->path);
+ }
+ goto out;
+ }
+ if (size == 0)
+ goto done;
+ }
+ list = alloca(size);
+ if (!list) {
+ op_errno = errno;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(list, value_buf, size);
+ } else {
+ size = sys_llistxattr(real_path, list, size);
+ if (size < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ }
+ remaining_size = size;
+ list_offset = 0;
+ keybuffer = alloca(XATTR_KEY_BUF_SIZE);
+ while (remaining_size > 0) {
+ keybuff_len = snprintf(keybuffer, XATTR_KEY_BUF_SIZE, "%s",
+ list + list_offset);
+
+ ret = posix_handle_georep_xattrs(frame, keybuffer, NULL, _gf_false);
+ if (ret == -1)
+ goto ignore;
+
+ ret = posix_handle_mdata_xattr(frame, keybuffer, &op_errno);
+ if (ret == -1) {
+ goto ignore;
+ }
+
+ if (posix_is_gfid2path_xattr(keybuffer)) {
+ goto ignore;
+ }
+
+ have_val = _gf_false;
+ size = sys_lgetxattr(real_path, keybuffer, value_buf,
+ XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed due to overflow of"
+ " buffer on %s (path: %s): %s ",
+ real_path, loc->path, keybuffer);
+ size = sys_lgetxattr(real_path, keybuffer, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on"
+ " %s (path: %s): key = %s ",
+ real_path, loc->path, keybuffer);
+ goto out;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_errno = errno;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_lgetxattr(real_path, keybuffer, value, size);
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on"
+ " %s (path: %s): key = %s ",
+ real_path, loc->path, keybuffer);
+ GF_FREE(value);
+ goto out;
+ }
+ }
+ value[size] = '\0';
+#ifdef GF_DARWIN_HOST_OS
+ /* The protocol expect namespace for now */
+ char *newkey = NULL;
+ gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey);
+ keybuff_len = snprintf(keybuffer, sizeof(keybuffer), "%s", newkey);
+ GF_FREE(newkey);
+#endif
+ op_ret = dict_set_dynptr(dict, keybuffer, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on %s (gfid-handle: %s) for the key %s failed.",
+ loc->path, real_path, keybuffer);
+ GF_FREE(value);
+ goto out;
+ }
+
+ ignore:
+ remaining_size -= keybuff_len + 1;
+ list_offset += keybuff_len + 1;
+
+ } /* while (remaining_size > 0) */
+
+done:
+ op_ret = size;
+
+ if (xdata && (op_ret >= 0)) {
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &buf);
+ }
+
+ if (dict) {
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ return 0;
+}
+
+int32_t
+posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct posix_fd *pfd = NULL;
+ int _fd = -1;
+ int32_t list_offset = 0;
+ ssize_t size = 0;
+ size_t remaining_size = 0;
+ char *value = NULL;
+ char *list = NULL;
+ dict_t *dict = NULL;
+ int ret = -1;
+ char key[4096] = {
+ 0,
+ };
+ int key_len;
+ char *value_buf = NULL;
+ gf_boolean_t have_val = _gf_false;
+ struct iatt buf = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ /* Get the total size */
+ dict = dict_new();
+ if (!dict) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32(dict, (char *)name, 1);
+ if (ret < 0) {
+ op_ret = -1;
+ size = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value for %s",
+ name);
+ goto out;
+ }
+ goto done;
+ }
+
+ if (name && strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) {
+ op_ret = posix_fdget_objectsignature(_fd, dict);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "posix_fdget_objectsignature failed");
+ op_errno = -op_ret;
+ op_ret = -1;
+ size = -1;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ /* here allocate value_buf of 8192 bytes to avoid one extra getxattr
+ call,If buffer size is small to hold the xattr result then it will
+ allocate a new buffer value of required size and call getxattr again
+ */
+ value_buf = alloca(XATTR_VAL_BUF_SIZE);
+
+ if (name) {
+ key_len = snprintf(key, sizeof(key), "%s", name);
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ char *newkey = NULL;
+ gf_add_prefix(XATTR_USER_PREFIX, key, &newkey);
+ key_len = snprintf(key, sizeof(key), "%s", newkey);
+ GF_FREE(newkey);
+ }
+#endif
+ size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed due to overflow of"
+ "buffer on %s ",
+ key);
+ size = sys_fgetxattr(_fd, key, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ if (errno == ENODATA || errno == ENOATTR) {
+ gf_msg_debug(this->name, 0,
+ "fgetxattr"
+ " failed on key %s (%s)",
+ key, strerror(op_errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr"
+ " failed on key %s",
+ key);
+ }
+ goto done;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_fgetxattr(_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr"
+ " failed on fd %p for the key %s ",
+ fd, key);
+ GF_FREE(value);
+ goto out;
+ }
+ }
+
+ value[size] = '\0';
+ op_ret = dict_set_dynptr(dict, key, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on key %s failed",
+ key);
+ GF_FREE(value);
+ goto out;
+ }
+
+ goto done;
+ }
+ size = sys_flistxattr(_fd, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size > 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed due to overflow of buffer"
+ " on %p ",
+ fd);
+ size = sys_flistxattr(_fd, NULL, 0);
+ }
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting "
+ "brick with 'user_xattr' flag)");
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed "
+ "on %p:",
+ fd);
+ }
+ goto out;
+ }
+ if (size == 0)
+ goto done;
+ }
+ list = alloca(size + 1);
+ if (!list) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (have_val)
+ memcpy(list, value_buf, size);
+ else
+ size = sys_flistxattr(_fd, list, size);
+
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ if (*(list + list_offset) == '\0')
+ break;
+
+ key_len = snprintf(key, sizeof(key), "%s", list + list_offset);
+ have_val = _gf_false;
+ size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed due to overflow of buffer"
+ " on fd %p: for the key %s ",
+ fd, key);
+ size = sys_fgetxattr(_fd, key, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed "
+ "on fd %p for the key %s ",
+ fd, key);
+ break;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_fgetxattr(_fd, key, value, size);
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed o"
+ "n the fd %p for the key %s ",
+ fd, key);
+ GF_FREE(value);
+ break;
+ }
+ }
+ value[size] = '\0';
+
+ op_ret = dict_set_dynptr(dict, key, value, size);
+ if (op_ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "failed on key %s",
+ key);
+ GF_FREE(value);
+ goto out;
+ }
+ remaining_size -= key_len + 1;
+ list_offset += key_len + 1;
+
+ } /* while (remaining_size > 0) */
+
+done:
+ op_ret = size;
+
+ if (xdata && (op_ret >= 0)) {
+ xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata,
+ &buf);
+ }
+
+ if (dict) {
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+static int
+_handle_fsetxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_fhandle_pair(filler->frame, filler->this, filler->fdnum, k, v,
+ filler->flags, filler->stbuf, filler->fd);
+}
+
+int32_t
+posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ int _fd = -1;
+ int ret = -1;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+ VALIDATE_OR_GOTO(dict, out);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fsetxattr (fstat)"
+ "failed on fd=%p",
+ fd);
+ goto out;
+ }
+
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+
+ filler.fdnum = _fd;
+ filler.this = this;
+ filler.frame = frame;
+ filler.stbuf = &preop;
+ filler.fd = fd;
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach(dict, _handle_fsetxattr_keyvalue_pair, &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ if (!ret && xdata && dict_get(xdata, GLUSTERFS_DURABLE_OP)) {
+ op_ret = sys_fsync(_fd);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ P_MSG_DURABILITY_REQ_NOT_SATISFIED,
+ "could not satisfy durability request: "
+ "reason ");
+ }
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, &postop);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "fsetxattr (fstat)"
+ "failed on fd=%p",
+ fd);
+ goto out;
+ }
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ ret = posix_set_iatt_in_dict(xattr, &preop, &postop);
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+}
+
+int
+_posix_remove_xattr(dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = (posix_xattr_filler_t *)data;
+ this = filler->this;
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = (struct posix_private *)this->private;
+ char *newkey = NULL;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ gf_remove_prefix(XATTR_USER_PREFIX, key, &newkey);
+ gf_msg_debug("remove_xattr", 0, "key %s => %s", key, newkey);
+ key = newkey;
+ }
+#endif
+ /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may
+ * have special behavior. Ex: removexattr("posix.system_acl_access"),
+ * removes more than one xattr on the file that could be present in the
+ * bulk-removal request. Removexattr of these deleted xattrs will fail
+ * with either ENODATA/ENOATTR. Since all this fop cares is removal of the
+ * xattrs in bulk-remove request and if they are already deleted, it can be
+ * treated as success.
+ */
+
+ if (filler->real_path)
+ op_ret = sys_lremovexattr(filler->real_path, key);
+ else
+ op_ret = sys_fremovexattr(filler->fdnum, key);
+
+ if (op_ret == -1) {
+ if (errno == ENODATA || errno == ENOATTR)
+ op_ret = 0;
+ }
+
+ if (op_ret == -1) {
+ filler->op_errno = errno;
+ if (errno != ENOATTR && errno != ENODATA && errno != EPERM) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "removexattr failed on "
+ "file/dir %s with gfid: %s (for %s)",
+ filler->real_path ? filler->real_path : "",
+ uuid_utoa(filler->inode->gfid), key);
+ }
+ }
+#ifdef GF_DARWIN_HOST_OS
+ GF_FREE(newkey);
+#endif
+ return op_ret;
+}
+
+int
+posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata, int *op_errno,
+ dict_t **xdata_rsp)
+{
+ gf_boolean_t bulk_removexattr = _gf_false;
+ gf_boolean_t disallow = _gf_false;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ int op_ret = 0;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ int ret = 0;
+ int _fd = -1;
+ xlator_t *this = frame->this;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {0};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ if (loc) {
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ *op_errno = ESTALE;
+ goto out;
+ }
+ inode = loc->inode;
+ } else {
+ op_ret = posix_fd_ctx_get(fd, this, &pfd, op_errno);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+ inode = fd->inode;
+ }
+
+ if (posix_is_gfid2path_xattr(name)) {
+ op_ret = -1;
+ *op_errno = ENOATTR;
+ goto out;
+ }
+
+ if (loc) {
+ ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED,
+ "pstat operaton failed on %s", real_path);
+ }
+ } else {
+ ret = posix_fdstat(this, inode, _fd, &preop);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED,
+ "fdstat operaton failed on %s", real_path ? real_path : "");
+ }
+ }
+
+ if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Remove xattr called on %s for file/dir %s with gfid: "
+ "%s",
+ name, real_path ? real_path : "", uuid_utoa(inode->gfid));
+ op_ret = -1;
+ *op_errno = EPERM;
+ goto out;
+ } else if (posix_is_bulk_removexattr((char *)name, xdata)) {
+ bulk_removexattr = _gf_true;
+ (void)dict_has_key_from_array(xdata, disallow_removexattrs, &disallow);
+ if (disallow) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Bulk removexattr has keys that shouldn't be "
+ "removed for file/dir %s with gfid: %s",
+ real_path ? real_path : "", uuid_utoa(inode->gfid));
+ op_ret = -1;
+ *op_errno = EPERM;
+ goto out;
+ }
+ }
+
+ if (bulk_removexattr) {
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.fdnum = _fd;
+ filler.inode = inode;
+ op_ret = dict_foreach(xdata, _posix_remove_xattr, &filler);
+ if (op_ret) {
+ *op_errno = filler.op_errno;
+ goto out;
+ }
+ } else {
+ if (loc)
+ op_ret = sys_lremovexattr(real_path, name);
+ else
+ op_ret = sys_fremovexattr(_fd, name);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ if (*op_errno != ENOATTR && *op_errno != ENODATA &&
+ *op_errno != EPERM) {
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED,
+ "removexattr on %s with gfid %s "
+ "(for %s)",
+ real_path, uuid_utoa(inode->gfid), name);
+ }
+ goto out;
+ }
+ }
+
+ if (loc) {
+ posix_set_ctime(frame, this, real_path, -1, inode, NULL);
+ ret = posix_pstat(this, inode, loc->gfid, real_path, &postop,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED,
+ "pstat operaton failed on %s", real_path);
+ }
+ } else {
+ posix_set_ctime(frame, this, NULL, _fd, inode, NULL);
+ ret = posix_fdstat(this, inode, _fd, &postop);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED,
+ "fdstat operaton failed on %s", real_path);
+ }
+ }
+ if (ret)
+ goto out;
+ *xdata_rsp = dict_new();
+ if (!*xdata_rsp)
+ goto out;
+
+ ret = posix_set_iatt_in_dict(*xdata_rsp, &preop, &postop);
+
+ op_ret = 0;
+out:
+ SET_TO_OLD_FS_ID();
+ return op_ret;
+}
+
+int32_t
+posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ dict_t *xdata_rsp = NULL;
+
+ VALIDATE_OR_GOTO(loc, out);
+
+ op_ret = posix_common_removexattr(frame, loc, NULL, name, xdata, &op_errno,
+ &xdata_rsp);
+out:
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+
+ return 0;
+}
+
+int32_t
+posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ dict_t *xdata_rsp = NULL;
+
+ VALIDATE_OR_GOTO(fd, out);
+
+ op_ret = posix_common_removexattr(frame, NULL, fd, name, xdata, &op_errno,
+ &xdata_rsp);
+out:
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+
+ return 0;
+}
+
+int32_t
+posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ struct posix_fd *pfd = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+void
+posix_print_xattr(dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_msg_debug("posix", 0, "(key/val) = (%s/%d)", key, data_to_int32(value));
+}
+
+/**
+ * add_array - add two arrays of 32-bit numbers (stored in network byte order)
+ * dest = dest + src
+ * @count: number of 32-bit numbers
+ * FIXME: handle overflow
+ */
+
+static void
+__add_array(int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ int32_t destval = 0;
+ for (i = 0; i < count; i++) {
+ destval = ntoh32(dest[i]);
+ dest[i] = hton32(destval + ntoh32(src[i]));
+ }
+}
+
+static void
+__add_long_array(int64_t *dest, int64_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton64(ntoh64(dest[i]) + ntoh64(src[i]));
+ }
+}
+
+/* functions:
+ __add_array_with_default
+ __add_long_array_with_default
+
+ xattrop type:
+ GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT
+
+ These operations are similar to 'GF_XATTROP_ADD_ARRAY',
+ except that it adds a default value if xattr is missing
+ or its value is zero on disk.
+
+ One use-case of this operation is in inode-quota.
+ When a new directory is created, its default dir_count
+ should be set to 1. So when a xattrop performed setting
+ inode-xattrs, it should account initial dir_count
+ 1 if the xattrs are not present
+
+ Here is the usage of this operation
+
+ value required in xdata for each key
+ struct array {
+ int32_t newvalue_1;
+ int32_t newvalue_2;
+ ...
+ int32_t newvalue_n;
+ int32_t default_1;
+ int32_t default_2;
+ ...
+ int32_t default_n;
+ };
+
+ or
+
+ struct array {
+ int32_t value_1;
+ int32_t value_2;
+ ...
+ int32_t value_n;
+ } data[2];
+ fill data[0] with new value to add
+ fill data[1] with default value
+
+ xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
+ for i from 1 to n
+ {
+ if (xattr (dest_i) is zero or not set in the disk)
+ dest_i = newvalue_i + default_i
+ else
+ dest_i = dest_i + newvalue_i
+ }
+
+ value in xdata after xattrop is successful
+ struct array {
+ int32_t dest_1;
+ int32_t dest_2;
+ ...
+ int32_t dest_n;
+ };
+*/
+static void
+__add_array_with_default(int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ int32_t destval = 0;
+
+ for (i = 0; i < count; i++) {
+ destval = ntoh32(dest[i]);
+ if (destval == 0)
+ dest[i] = hton32(ntoh32(src[i]) + ntoh32(src[count + i]));
+ else
+ dest[i] = hton32(destval + ntoh32(src[i]));
+ }
+}
+
+static void
+__add_long_array_with_default(int64_t *dest, int64_t *src, int count)
+{
+ int i = 0;
+ int64_t destval = 0;
+
+ for (i = 0; i < count; i++) {
+ destval = ntoh64(dest[i]);
+ if (destval == 0)
+ dest[i] = hton64(ntoh64(src[i]) + ntoh64(src[i + count]));
+ else
+ dest[i] = hton64(destval + ntoh64(src[i]));
+ }
+}
+
+static int
+_posix_handle_xattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ int size = 0;
+ int count = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ gf_xattrop_flags_t optype = 0;
+ char *array = NULL;
+ char *dst_data = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+ posix_inode_ctx_t *ctx = NULL;
+
+ filler = tmp;
+
+ optype = (gf_xattrop_flags_t)(filler->flags);
+ this = filler->this;
+ inode = filler->inode;
+ count = v->len;
+ if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT ||
+ optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT)
+ count = count / 2;
+
+ array = GF_CALLOC(count, sizeof(char), gf_posix_mt_char);
+
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) {
+ k += XATTR_USER_PREFIX_LEN;
+ }
+ }
+#endif
+ op_ret = posix_inode_ctx_get_all(inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->xattrop_lock);
+ {
+ if (filler->real_path) {
+ size = sys_lgetxattr(filler->real_path, k, (char *)array, count);
+ } else {
+ size = sys_fgetxattr(filler->fdnum, k, (char *)array, count);
+ }
+
+ op_errno = errno;
+ if ((size == -1) && (op_errno != ENODATA) && (op_errno != ENOATTR)) {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported by filesystem");
+ } else if (op_errno != ENOENT ||
+ !posix_special_xattr(marker_xattrs, k)) {
+ if (filler->real_path)
+ gf_msg(this->name, fop_log_level(GF_FOP_XATTROP, op_errno),
+ op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s while "
+ "doing xattrop: Key:%s ",
+ filler->real_path, k);
+ else
+ gf_msg(
+ this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed on gfid=%s "
+ "while doing xattrop: "
+ "Key:%s (%s)",
+ uuid_utoa(filler->inode->gfid), k, strerror(op_errno));
+ }
+
+ op_ret = -1;
+ goto unlock;
+ }
+
+ if (size == -1 && optype == GF_XATTROP_GET_AND_SET) {
+ GF_FREE(array);
+ array = NULL;
+ }
+
+ /* We only write back the xattr if it has been really modified
+ * (i.e. v->data is not all 0's). Otherwise we return its value
+ * but we don't update anything.
+ *
+ * If the xattr does not exist, a value of all 0's is returned
+ * without creating it. */
+ size = count;
+ if (optype != GF_XATTROP_GET_AND_SET &&
+ mem_0filled(v->data, v->len) == 0)
+ goto unlock;
+
+ dst_data = array;
+ switch (optype) {
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array((int32_t *)array, (int32_t *)v->data, count / 4);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY64:
+ __add_long_array((int64_t *)array, (int64_t *)v->data,
+ count / 8);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT:
+ __add_array_with_default((int32_t *)array, (int32_t *)v->data,
+ count / 4);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT:
+ __add_long_array_with_default((int64_t *)array,
+ (int64_t *)v->data, count / 8);
+ break;
+
+ case GF_XATTROP_GET_AND_SET:
+ dst_data = v->data;
+ break;
+
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_UNKNOWN_OP,
+ "Unknown xattrop type (%d)"
+ " on %s. Please send a bug report to "
+ "gluster-devel@gluster.org",
+ optype, filler->real_path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (filler->real_path) {
+ size = sys_lsetxattr(filler->real_path, k, dst_data, count, 0);
+ } else {
+ size = sys_fsetxattr(filler->fdnum, k, (char *)dst_data, count, 0);
+ }
+ op_errno = errno;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->xattrop_lock);
+
+ if (op_ret == -1)
+ goto out;
+
+ if (size == -1) {
+ if (filler->real_path)
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "setxattr failed on %s "
+ "while doing xattrop: key=%s",
+ filler->real_path, k);
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "fsetxattr failed on gfid=%s while doing "
+ "xattrop: key=%s (%s)",
+ uuid_utoa(filler->inode->gfid), k, strerror(op_errno));
+ op_ret = -1;
+ goto out;
+ } else if (array) {
+ op_ret = dict_set_bin(filler->xattr, k, array, count);
+ if (op_ret) {
+ if (filler->real_path)
+ gf_msg_debug(this->name, 0,
+ "dict_set_bin failed (path=%s): "
+ "key=%s (%s)",
+ filler->real_path, k, strerror(-size));
+ else
+ gf_msg_debug(this->name, 0,
+ "dict_set_bin failed (gfid=%s): "
+ "key=%s (%s)",
+ uuid_utoa(filler->inode->gfid), k,
+ strerror(-size));
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ GF_FREE(array);
+ array = NULL;
+ goto out;
+ }
+ array = NULL;
+ }
+
+out:
+ if (op_ret < 0)
+ filler->op_errno = op_errno;
+
+ if (array)
+ GF_FREE(array);
+
+ return op_ret;
+}
+
+/**
+ * xattrop - xattr operations - for internal use by GlusterFS
+ * @optype: ADD_ARRAY:
+ * dict should contain:
+ * "key" ==> array of 32-bit numbers
+ */
+
+int
+do_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ int _fd = -1;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ dict_t *xdata_rsp = NULL;
+ struct iatt stbuf = {0};
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(xattr, out);
+ VALIDATE_OR_GOTO(this, out);
+
+ if (fd) {
+ op_ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING,
+ fop_log_level(GF_FOP_FXATTROP, op_errno),
+ P_MSG_PFD_GET_FAILED,
+ "failed to get pfd from"
+ " fd=%p",
+ fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+ }
+
+ if (loc && !gf_uuid_is_null(loc->gfid)) {
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (real_path) {
+ inode = loc->inode;
+ } else if (fd) {
+ inode = fd->inode;
+ }
+
+ xattr_rsp = dict_new();
+ if (xattr_rsp == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ filler.this = this;
+ filler.fdnum = _fd;
+ filler.real_path = real_path;
+ filler.flags = (int)optype;
+ filler.inode = inode;
+ filler.xattr = xattr_rsp;
+
+ op_ret = dict_foreach(xattr, _posix_handle_xattr_keyvalue_pair, &filler);
+ op_errno = filler.op_errno;
+ if (op_ret < 0)
+ goto out;
+
+ if (!xdata)
+ goto out;
+
+ if (fd) {
+ op_ret = posix_fdstat(this, inode, _fd, &stbuf);
+ } else {
+ op_ret = posix_pstat(this, inode, inode->gfid, real_path, &stbuf,
+ _gf_false);
+ }
+ if (op_ret < 0) {
+ op_errno = errno;
+ goto out;
+ }
+ xdata_rsp = posix_xattr_fill(this, real_path, loc, fd, _fd, xdata, &stbuf);
+ if (!xdata_rsp) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ posix_set_mode_in_dict(xdata, xdata_rsp, &stbuf);
+out:
+
+ STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, xattr_rsp, xdata_rsp);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+ return 0;
+}
+
+int
+posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ do_xattrop(frame, this, loc, NULL, optype, xattr, xdata);
+ return 0;
+}
+
+int
+posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ do_xattrop(frame, this, NULL, fd, optype, xattr, xdata);
+ return 0;
+}
+
+int
+posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ op_ret = sys_access(real_path, mask & 07);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED,
+ "access failed on %s", real_path);
+ goto out;
+ }
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ dict_t *rsp_xdata = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ op_ret = sys_ftruncate(_fd, offset);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
+ "ftruncate failed on fd=%p (%" PRId64 "", fd, offset);
+ goto out;
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &postop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop);
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, &preop, &postop,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int _fd = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct iatt buf = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ dict_t *xattr_rsp = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ if (!xdata)
+ gf_msg_trace(this->name, 0, "null xdata passed, fd %p", fd);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &buf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf);
+
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata,
+ &xattr_rsp, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ }
+ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL);
+ }
+
+ posix_update_iatt_buf(&buf, _fd, NULL, xdata);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+ return 0;
+}
+
+int32_t
+posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ struct gf_lease nullease = {
+ 0,
+ };
+
+ gf_msg(this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED,
+ "\"features/leases\" translator is not loaded. You need"
+ "to use it for proper functioning of your application");
+
+ STACK_UNWIND_STRICT(lease, frame, -1, ENOSYS, &nullease, NULL);
+ return 0;
+}
+
+static int gf_posix_lk_log;
+
+int32_t
+posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ struct gf_flock nullock = {
+ 0,
+ };
+
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(lk, frame, -1, ENOSYS, &nullock, NULL);
+ return 0;
+}
+
+int32_t
+posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(inodelk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(finodelk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(entrylk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int
+posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size,
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
+{
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ struct posix_fd *pfd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
+ int op_errno = 0;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ count = -1;
+ errno = op_errno;
+ goto out;
+ }
+
+ if (skip_dirs) {
+ hpath = alloca(PATH_MAX);
+ len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX);
+ if (len <= 0) {
+ errno = ESTALE;
+ count = -1;
+ goto out;
+ }
+ len = strlen(hpath);
+ hpath[len] = '/';
+ }
+
+ if (!off) {
+ rewinddir(dir);
+ } else {
+ seekdir(dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off && off != pfd->dir_eof) {
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ off, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ }
+
+ while (filled <= size) {
+ in_case = (u_long)telldir(dir);
+
+ if (in_case == -1) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED,
+ "telldir failed on dir=%p", dir);
+ goto out;
+ }
+
+ errno = 0;
+
+ entry = sys_readdir(dir, scratch);
+
+ if (!entry || errno != 0) {
+ if (errno == EBADF) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno,
+ P_MSG_DIR_OPERATION_FAILED, "readdir failed on dir=%p",
+ dir);
+ goto out;
+ }
+ break;
+ }
+
+#ifdef __NetBSD__
+ /*
+ * NetBSD with UFS1 backend uses backing files for
+ * extended attributes. They can be found in a
+ * .attribute file located at the root of the filesystem
+ * We hide it to glusterfs clients, since chaos will occur
+ * when the cluster/dht xlator decides to distribute
+ * exended attribute backing file across storage servers.
+ */
+ if (__is_root_gfid(fd->inode->gfid) == 0 &&
+ (!strcmp(entry->d_name, ".attribute")))
+ continue;
+#endif /* __NetBSD__ */
+
+ if (__is_root_gfid(fd->inode->gfid) &&
+ (!strcmp(GF_HIDDEN_PATH, entry->d_name))) {
+ continue;
+ }
+
+ if (skip_dirs) {
+ if (DT_ISDIR(entry->d_type)) {
+ continue;
+ } else if (hpath) {
+ strcpy(&hpath[len + 1], entry->d_name);
+ ret = sys_lstat(hpath, &stbuf);
+ if (!ret && S_ISDIR(stbuf.st_mode))
+ continue;
+ }
+ }
+
+ this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
+ strlen(entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir(dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case && in_case != pfd->dir_eof) {
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL,
+ P_MSG_DIR_OPERATION_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ in_case, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ break;
+ }
+
+ this_entry = gf_dirent_for_name(entry->d_name);
+
+ if (!this_entry) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ P_MSG_GF_DIRENT_CREATE_FAILED,
+ "could not create "
+ "gf_dirent for entry %s",
+ entry->d_name);
+ goto out;
+ }
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
+ this_entry->d_ino = entry->d_ino;
+ this_entry->d_type = entry->d_type;
+
+ list_add_tail(&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count++;
+ }
+
+ if ((!sys_readdir(dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ pfd->dir_eof = (u_long)last_off;
+ }
+out:
+ return count;
+}
+
+dict_t *
+posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd,
+ char *entry_path, dict_t *dict, struct iatt *stbuf)
+{
+ loc_t tmp_loc = {
+ 0,
+ };
+
+ /* if we don't send the 'loc', open-fd-count be a problem. */
+ tmp_loc.inode = inode;
+
+ return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf);
+}
+
+int
+posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+ dict_t *dict)
+{
+ gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ char *hpath = NULL;
+ int len = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ uuid_t gfid;
+ int ret = -1;
+
+ if (list_empty(&entries->list))
+ return 0;
+
+ itable = fd->inode->table;
+
+ hpath = alloca(PATH_MAX);
+ len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX);
+ if (len <= 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED,
+ "Failed to create handle path, fd=%p, gfid=%s", fd,
+ uuid_utoa(fd->inode->gfid));
+ return -1;
+ }
+ len = strlen(hpath);
+ hpath[len] = '/';
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ inode = inode_grep(fd->inode->table, fd->inode, entry->d_name);
+ if (inode)
+ gf_uuid_copy(gfid, inode->gfid);
+ else
+ bzero(gfid, 16);
+
+ strcpy(&hpath[len + 1], entry->d_name);
+
+ ret = posix_pstat(this, inode, gfid, hpath, &stbuf, _gf_false);
+
+ if (ret == -1) {
+ if (inode)
+ inode_unref(inode);
+ continue;
+ }
+
+ posix_update_iatt_buf(&stbuf, -1, hpath, dict);
+
+ if (!inode)
+ inode = inode_find(itable, stbuf.ia_gfid);
+
+ if (!inode)
+ inode = inode_new(itable);
+
+ entry->inode = inode;
+
+ if (dict) {
+ entry->dict = posix_entry_xattr_fill(this, entry->inode, fd, hpath,
+ dict, &stbuf);
+ }
+
+ entry->d_stat = stbuf;
+ if (stbuf.ia_ino)
+ entry->d_ino = stbuf.ia_ino;
+
+ if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) {
+ /* The platform supports d_type but the underlying
+ filesystem doesn't. We set d_type to the correct
+ value from ia_type */
+ entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type);
+ }
+
+ inode = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
+posix_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, int whichop, dict_t *dict)
+{
+ struct posix_fd *pfd = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ gf_dirent_t entries;
+ int32_t skip_dirs = 0;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ INIT_LIST_HEAD(&entries.list);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ dir = pfd->dir;
+
+ if (!dir) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32(dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ LOCK(&fd->lock);
+ {
+ /* posix_fill_readdir performs multiple separate individual
+ readdir() calls to fill up the buffer.
+
+ In case of NFS where the same anonymous FD is shared between
+ different applications, reading a common directory can
+ result in the anonymous fd getting re-used unsafely between
+ the two readdir requests (in two different io-threads).
+
+ It would also help, in the future, to replace the loop
+ around readdir() with a single large getdents() call.
+ */
+ count = posix_fill_readdir(fd, dir, off, size, &entries, this,
+ skip_dirs);
+ }
+ UNLOCK(&fd->lock);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+
+ if (whichop != GF_FOP_READDIRP)
+ goto out;
+
+ posix_readdirp_fill(this, fd, &entries, dict);
+
+out:
+ if (whichop == GF_FOP_READDIR)
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL);
+ else
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+int32_t
+posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIR, xdata);
+ return 0;
+}
+
+int32_t
+posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ gf_dirent_t entries;
+ int32_t op_ret = -1, op_errno = 0;
+ gf_dirent_t *entry = NULL;
+
+ if ((dict != NULL) && (dict_get(dict, GET_ANCESTRY_DENTRY_KEY))) {
+ INIT_LIST_HEAD(&entries.list);
+
+ op_ret = posix_get_ancestry(this, fd->inode, &entries, NULL,
+ POSIX_ANCESTRY_DENTRY, &op_errno, dict);
+ if (op_ret >= 0) {
+ op_ret = 0;
+
+ list_for_each_entry(entry, &entries.list, list) { op_ret++; }
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+ return 0;
+ }
+
+ posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIRP, dict);
+ return 0;
+}
+
+int32_t
+posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ ssize_t bytes_read = 0;
+ int32_t weak_checksum = 0;
+ int32_t zerofillcheck = 0;
+ /* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH,
+ so this is used. */
+ unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0};
+ unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0};
+ unsigned char *checksum = NULL;
+ struct posix_private *priv = NULL;
+ dict_t *rsp_xdata = NULL;
+ gf_boolean_t buf_has_zeroes = _gf_false;
+ struct iatt preop = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+
+ alloc_buf = _page_aligned_alloc(len, &buf);
+ if (!alloc_buf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ if (xdata) {
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ LOCK(&fd->lock);
+ {
+ if (priv->aio_capable && priv->aio_init_done)
+ __posix_fd_set_odirect(fd, pfd, 0, offset, len);
+
+ bytes_read = sys_pread(_fd, buf, len, offset);
+ if (bytes_read < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PREAD_FAILED,
+ "pread of %d bytes returned %zd", len, bytes_read);
+
+ op_errno = errno;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (xdata &&
+ dict_get_int32(xdata, "check-zero-filled", &zerofillcheck) == 0) {
+ buf_has_zeroes = (mem_0filled(buf, bytes_read)) ? _gf_false : _gf_true;
+ ret = dict_set_uint32(rsp_xdata, "buf-has-zeroes", buf_has_zeroes);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for key: %s",
+ uuid_utoa(fd->inode->gfid), "buf-has-zeroes");
+ op_errno = -ret;
+ goto out;
+ }
+ }
+ weak_checksum = gf_rsync_weak_checksum((unsigned char *)buf, (size_t)ret);
+
+ if (priv->fips_mode_rchecksum) {
+ ret = dict_set_int32(rsp_xdata, "fips-mode-rchecksum", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for key: %s",
+ uuid_utoa(fd->inode->gfid), "fips-mode-rchecksum");
+ goto out;
+ }
+ checksum = strong_checksum;
+ gf_rsync_strong_checksum((unsigned char *)buf, (size_t)bytes_read,
+ (unsigned char *)checksum);
+ } else {
+ checksum = md5_checksum;
+ gf_rsync_md5_checksum((unsigned char *)buf, (size_t)bytes_read,
+ (unsigned char *)checksum);
+ }
+ op_ret = 0;
+
+ posix_set_ctime(frame, this, NULL, _fd, fd->inode, NULL);
+
+out:
+ STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum,
+ checksum, rsp_xdata);
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ GF_FREE(alloc_buf);
+
+ return 0;
+}
+
+int
+posix_forget(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ char *unlink_path = NULL;
+ uint64_t ctx_uint1 = 0;
+ uint64_t ctx_uint2 = 0;
+ posix_inode_ctx_t *ctx = NULL;
+ posix_mdata_t *mdata = NULL;
+ struct posix_private *priv_posix = NULL;
+
+ priv_posix = (struct posix_private *)this->private;
+ if (!priv_posix)
+ return 0;
+
+ ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2);
+ if (!ctx_uint1)
+ goto check_ctx2;
+
+ ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1;
+
+ if (ctx->unlink_flag == GF_UNLINK_TRUE) {
+ POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid,
+ unlink_path);
+ if (!unlink_path) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
+ "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
+ ret = -1;
+ goto ctx_free;
+ }
+ ret = sys_unlink(unlink_path);
+ }
+ctx_free:
+ pthread_mutex_destroy(&ctx->xattrop_lock);
+ pthread_mutex_destroy(&ctx->write_atomic_lock);
+ pthread_mutex_destroy(&ctx->pgfid_lock);
+ GF_FREE(ctx);
+
+check_ctx2:
+ if (ctx_uint2) {
+ mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2;
+ }
+
+ GF_FREE(mdata);
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-inode-handle.h b/xlators/storage/posix/src/posix-inode-handle.h
new file mode 100644
index 00000000000..36c47f2bebc
--- /dev/null
+++ b/xlators/storage/posix/src/posix-inode-handle.h
@@ -0,0 +1,118 @@
+/*
+ Copyright (c) 2011-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _POSIX_INODE_HANDLE_H
+#define _POSIX_INODE_HANDLE_H
+
+#include <limits.h>
+#include <sys/types.h>
+#include <glusterfs/gf-dirent.h>
+#include "posix.h"
+
+/* From Open Group Base Specifications Issue 6 */
+#ifndef _XOPEN_PATH_MAX
+#define _XOPEN_PATH_MAX 1024
+#endif
+
+#define TRASH_DIR "landfill"
+
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define SLEN(str) (sizeof(str) - 1)
+
+#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/'))
+#define LOC_IS_DIR(loc) \
+ (loc && (loc->inode) && (loc->inode->ia_type == IA_IFDIR))
+#define MAKE_REAL_PATH(var, this, path) \
+ do { \
+ size_t path_len = strlen(path); \
+ size_t var_len = path_len + POSIX_BASE_PATH_LEN(this) + 1; \
+ if (POSIX_PATH_MAX(this) != -1 && var_len >= POSIX_PATH_MAX(this)) { \
+ var = alloca(path_len + 1); \
+ strcpy(var, (path[0] == '/') ? path + 1 : path); \
+ } else { \
+ var = alloca(var_len); \
+ strcpy(var, POSIX_BASE_PATH(this)); \
+ strcpy(&var[POSIX_BASE_PATH_LEN(this)], path); \
+ } \
+ } while (0)
+
+#define MAKE_HANDLE_PATH(var, this, gfid, base) \
+ do { \
+ int __len = 0; \
+ int tot = PATH_MAX; \
+ var = alloca(tot); \
+ __len = posix_handle_path(this, gfid, base, var, tot); \
+ if (__len <= 0) { \
+ var = NULL; \
+ } \
+ } while (0)
+
+/* TODO: it is not a good idea to change a variable which
+ is not passed to the macro.. Fix it later */
+#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) \
+ do { \
+ if (!this->private) { \
+ op_ret = -1; \
+ gf_msg("make_inode_handle", GF_LOG_ERROR, 0, \
+ P_MSG_INODE_HANDLE_CREATE, \
+ "private is NULL, fini is already called"); \
+ break; \
+ } \
+ if (gf_uuid_is_null(loc->gfid)) { \
+ op_ret = -1; \
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INODE_HANDLE_CREATE, \
+ "null gfid for path %s", (loc)->path); \
+ break; \
+ } \
+ if (LOC_IS_DIR(loc) && LOC_HAS_ABSPATH(loc)) { \
+ MAKE_REAL_PATH(rpath, this, (loc)->path); \
+ op_ret = posix_pstat(this, (loc)->inode, (loc)->gfid, rpath, \
+ iatt_p, _gf_false); \
+ break; \
+ } \
+ errno = 0; \
+ op_ret = posix_istat(this, loc->inode, loc->gfid, NULL, iatt_p); \
+ if (errno != ELOOP) { \
+ MAKE_HANDLE_PATH(rpath, this, (loc)->gfid, NULL); \
+ if (!rpath) { \
+ op_ret = -1; \
+ gf_msg(this->name, GF_LOG_ERROR, errno, \
+ P_MSG_INODE_HANDLE_CREATE, \
+ "Failed to create inode handle " \
+ "for path %s", \
+ (loc)->path); \
+ } \
+ break; \
+ } /* __ret == -1 && errno == ELOOP */ \
+ else { \
+ op_ret = -1; \
+ } \
+ } while (0)
+
+#define POSIX_ANCESTRY_PATH (1 << 0)
+#define POSIX_ANCESTRY_DENTRY (1 << 1)
+
+int
+posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *buf,
+ size_t len);
+
+int
+posix_make_ancestryfromgfid(xlator_t *this, char *path, int pathsize,
+ gf_dirent_t *head, int type, uuid_t gfid,
+ const size_t handle_size,
+ const char *priv_base_path, inode_table_t *table,
+ inode_t **parent, dict_t *xdata, int32_t *op_errno);
+
+int
+posix_handle_init(xlator_t *this);
+
+int
+posix_handle_trash_init(xlator_t *this);
+
+#endif /* !_POSIX_INODE_HANDLE_H */
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index 81752c17e78..2253f381ac5 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -10,18 +10,16 @@
#ifndef __POSIX_MEM_TYPES_H__
#define __POSIX_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_posix_mem_types_ {
- gf_posix_mt_dir_entry_t = gf_common_mt_end + 1,
- gf_posix_mt_posix_fd,
- gf_posix_mt_char,
- gf_posix_mt_posix_private,
- gf_posix_mt_int32_t,
- gf_posix_mt_posix_dev_t,
- gf_posix_mt_trash_path,
- gf_posix_mt_paiocb,
- gf_posix_mt_end
+ gf_posix_mt_posix_fd = gf_common_mt_end + 1,
+ gf_posix_mt_char,
+ gf_posix_mt_posix_private,
+ gf_posix_mt_trash_path,
+ gf_posix_mt_paiocb,
+ gf_posix_mt_inode_ctx_t,
+ gf_posix_mt_mdata_attr,
+ gf_posix_mt_end
};
#endif
-
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
new file mode 100644
index 00000000000..f5bede266da
--- /dev/null
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_MESSAGES_H_
+#define _POSIX_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED,
+ P_MSG_READV_FAILED, P_MSG_FSTAT_FAILED, P_MSG_PFD_NULL,
+ P_MSG_INVALID_ARGUMENT, P_MSG_IO_SUBMIT_FAILED, P_MSG_WRITEV_FAILED,
+ P_MSG_IO_GETEVENTS_FAILED, P_MSG_UNKNOWN_OP, P_MSG_AIO_UNAVAILABLE,
+ P_MSG_IO_SETUP_FAILED, P_MSG_ZEROFILL_FAILED, P_MSG_OPENDIR_FAILED,
+ P_MSG_DIRFD_FAILED, P_MSG_FD_PATH_SETTING_FAILED, P_MSG_LSTAT_FAILED,
+ P_MSG_READYLINK_FAILED, P_MSG_GFID_FAILED, P_MSG_CREATE_FAILED,
+ P_MSG_MKNOD_FAILED, P_MSG_LCHOWN_FAILED, P_MSG_ACL_FAILED,
+ P_MSG_MKDIR_NOT_PERMITTED, P_MSG_DIR_OF_SAME_ID, P_MSG_MKDIR_FAILED,
+ P_MSG_CHOWN_FAILED, P_MSG_UNLINK_FAILED, P_MSG_KEY_STATUS_INFO,
+ P_MSG_XATTR_STATUS, P_MSG_RMDIR_NOT_PERMITTED, P_MSG_RMDIR_FAILED,
+ P_MSG_DIR_OPERATION_FAILED, P_MSG_SYMLINK_FAILED, P_MSG_DIR_FOUND,
+ P_MSG_LINK_FAILED, P_MSG_TRUNCATE_FAILED, P_MSG_FILE_OP_FAILED,
+ P_MSG_READ_FAILED, P_MSG_DICT_SET_FAILED, P_MSG_STATVFS_FAILED,
+ P_MSG_DIR_NOT_NULL, P_MSG_FSYNC_FAILED, P_MSG_CLOSE_FAILED,
+ P_MSG_GETTING_FILENAME_FAILED, P_MSG_INODE_PATH_GET_FAILED,
+ P_MSG_GET_KEY_VALUE_FAILED, P_MSG_CHMOD_FAILED, P_MSG_FCHMOD_FAILED,
+ P_MSG_FCHOWN_FAILED, P_MSG_UTIMES_FAILED, P_MSG_FUTIMES_FAILED,
+ P_MSG_XATTR_NOT_REMOVED, P_MSG_PFD_GET_FAILED, P_MSG_ACCESS_FAILED,
+ P_MSG_PREAD_FAILED, P_MSG_UUID_NULL, P_MSG_EXPORT_DIR_MISSING,
+ P_MSG_SUBVOLUME_ERROR, P_MSG_VOLUME_DANGLING, P_MSG_INVALID_OPTION,
+ P_MSG_INVALID_VOLUME_ID, P_MSG_VOLUME_ID_ABSENT,
+ P_MSG_HOSTNAME_MISSING, P_MSG_SET_ULIMIT_FAILED,
+ P_MSG_SET_FILE_MAX_FAILED, P_MSG_MAX_FILE_OPEN, P_MSG_OPEN_FAILED,
+ P_MSG_LOOKUP_NOT_PERMITTED, P_MSG_RENAME_FAILED, P_MSG_WRITE_FAILED,
+ P_MSG_FILE_FAILED, P_MSG_THREAD_FAILED, P_MSG_HEALTHCHECK_FAILED,
+ P_MSG_GET_FDCTX_FAILED, P_MSG_HANDLEPATH_FAILED,
+ P_MSG_IPC_NOT_HANDLE, P_MSG_SET_XDATA_FAIL,
+ P_MSG_DURABILITY_REQ_NOT_SATISFIED, P_MSG_XATTR_NOTSUP,
+ P_MSG_GFID_SET_FAILED, P_MSG_ACL_NOTSUP, P_MSG_BASEPATH_CHDIR_FAILED,
+ P_MSG_INVALID_OPTION_VAL, P_MSG_INVALID_NODE_UUID,
+ P_MSG_FSYNCER_THREAD_CREATE_FAILED, P_MSG_GF_DIRENT_CREATE_FAILED,
+ P_MSG_VOLUME_ID_FETCH_FAILED, P_MSG_UNKNOWN_ARGUMENT,
+ P_MSG_INODE_HANDLE_CREATE, P_MSG_ENTRY_HANDLE_CREATE, P_MSG_PGFID_OP,
+ P_MSG_POSIX_AIO, P_MSG_HANDLE_CREATE_TRASH, P_MSG_HANDLE_CREATE,
+ P_MSG_HANDLE_PATH_CREATE, P_MSG_SET_FILE_CONTENTS,
+ P_MSG_XDATA_GETXATTR, P_MSG_STALE_HANDLE_REMOVE_FAILED,
+ P_MSG_HANDLE_PATH_CREATE_FAILED, P_MSG_HANDLE_TRASH_CREATE,
+ P_MSG_HANDLE_DELETE, P_MSG_READLINK_FAILED, P_MSG_BUFFER_OVERFLOW,
+ P_MSG_SEEK_UNKOWN, P_MSG_SEEK_FAILED, P_MSG_INODE_RESOLVE_FAILED,
+ P_MSG_PREOP_CHECK_FAILED, P_MSG_LEASE_DISABLED,
+ P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED,
+ P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED,
+ P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED,
+ P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED,
+ P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED, P_MSG_NOMEM,
+ P_MSG_PSTAT_FAILED, P_MSG_FDSTAT_FAILED);
+
+#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/storage/posix/src/posix-metadata-disk.h b/xlators/storage/posix/src/posix-metadata-disk.h
new file mode 100644
index 00000000000..8833fbb5428
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata-disk.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_METADATA_DISK_H
+#define _POSIX_METADATA_DISK_H
+
+typedef struct gf_timespec_disk {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+} gf_timespec_disk_t;
+
+/* posix_mdata_t on disk structure */
+
+typedef struct __attribute__((__packed__)) posix_mdata_disk {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ gf_timespec_disk_t ctime;
+ gf_timespec_disk_t mtime;
+ gf_timespec_disk_t atime;
+} posix_mdata_disk_t;
+
+#endif /* _POSIX_METADATA_DISK_H */
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
new file mode 100644
index 00000000000..b1889052f11
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.c
@@ -0,0 +1,916 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include "posix-metadata.h"
+#include "posix-metadata-disk.h"
+#include "posix-handle.h"
+#include "posix-messages.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+
+static int gf_posix_xattr_enotsup_log;
+
+/* posix_mdata_to_disk converts posix_mdata_t into network byte order to
+ * save it on disk in machine independent format
+ */
+static inline void
+posix_mdata_to_disk(posix_mdata_disk_t *out, posix_mdata_t *in)
+{
+ out->version = in->version;
+ out->flags = htobe64(in->flags);
+
+ out->ctime.tv_sec = htobe64(in->ctime.tv_sec);
+ out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = htobe64(in->mtime.tv_sec);
+ out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = htobe64(in->atime.tv_sec);
+ out->atime.tv_nsec = htobe64(in->atime.tv_nsec);
+}
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+void
+posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in)
+{
+ out->ia_ctime = be64toh(in->ctime.tv_sec);
+ out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->ia_mtime = be64toh(in->mtime.tv_sec);
+ out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->ia_atime = be64toh(in->atime.tv_sec);
+ out->ia_atime_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
+ inode_t *inode, posix_mdata_t *metadata, int *op_errno)
+{
+ size_t size = 256;
+ int op_ret = -1;
+ char *value = NULL;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char gfid_str[64] = {0};
+ char *real_path = NULL;
+
+ if (!metadata) {
+ goto out;
+ }
+
+ if (_fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ *op_errno = errno;
+ uuid_utoa_r(inode->gfid, gfid_str);
+ gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_LSTAT_FAILED,
+ "lstat on gfid %s failed", gfid_str);
+ goto out;
+ }
+ }
+
+ value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, size);
+ } else if (real_path) {
+ size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size);
+ }
+
+ if (size == -1) {
+ *op_errno = errno;
+ if (value) {
+ GF_FREE(value);
+ value = NULL;
+ }
+ if ((*op_errno == ENOTSUP) || (*op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not supported"
+ " (try remounting brick with 'user xattr' "
+ "flag)");
+ } else if (*op_errno == ENOATTR || *op_errno == ENODATA) {
+ gf_msg_debug(this->name, 0,
+ "No such attribute:%s for file %s gfid: %s",
+ GF_XATTR_MDATA_KEY,
+ real_path ? real_path
+ : (real_path_arg ? real_path_arg : "null"),
+ inode ? uuid_utoa(inode->gfid) : "null");
+ goto out;
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, NULL, 0);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, NULL, 0);
+ } else if (real_path) {
+ size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, NULL, 0);
+ }
+
+ if (size == -1) { /* give up now and exist with an error */
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s gfid: %s key: %s ",
+ real_path ? real_path
+ : (real_path_arg ? real_path_arg : "null"),
+ inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+
+ value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value,
+ size);
+ } else if (real_path) {
+ size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size);
+ }
+ if (size == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s gfid: %s key: %s ",
+ real_path ? real_path
+ : (real_path_arg ? real_path_arg : "null"),
+ inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ }
+ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value);
+
+ op_ret = 0;
+out:
+ if (value)
+ GF_FREE(value);
+ return op_ret;
+}
+
+/* posix_store_mdata_xattr stores the posix_mdata_t on disk */
+static int
+posix_store_mdata_xattr(xlator_t *this, const char *real_path_arg, int fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ char *real_path = NULL;
+ int op_ret = 0;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char *key = GF_XATTR_MDATA_KEY;
+ char gfid_str[64] = {0};
+ posix_mdata_disk_t disk_metadata;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r(inode->gfid, gfid_str);
+ gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_LSTAT_FAILED,
+ "lstat on gfid %s failed", gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ /* Set default version as 1 */
+ posix_mdata_to_disk(&disk_metadata, metadata);
+
+ if (fd_based_fop) {
+ op_ret = sys_fsetxattr(fd, key, (void *)&disk_metadata,
+ sizeof(posix_mdata_disk_t), 0);
+ } else if (real_path_arg) {
+ op_ret = sys_lsetxattr(real_path_arg, key, (void *)&disk_metadata,
+ sizeof(posix_mdata_disk_t), 0);
+ } else if (real_path) {
+ op_ret = sys_lsetxattr(real_path, key, (void *)&disk_metadata,
+ sizeof(posix_mdata_disk_t), 0);
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+ if (real_path_arg) {
+ posix_dump_buffer(this, real_path_arg, key, value, 0);
+ } else if (real_path) {
+ posix_dump_buffer(this, real_path, key, value, 0);
+ }
+#endif
+out:
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ return op_ret;
+}
+
+/* _posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with out inode lock.
+ */
+int
+__posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ uint64_t ctx;
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ /* Handle readdirp: inode might be null, time attributes should be served
+ * from xattr not from backend's file attributes */
+ if (inode) {
+ ret = __inode_ctx_get1(inode, this, &ctx);
+ if (ret == 0) {
+ mdata = (posix_mdata_t *)(uintptr_t)ctx;
+ }
+ } else {
+ ret = -1;
+ }
+
+ if (ret == -1 || !mdata) {
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
+ "Could not allocate mdata. file: %s: gfid: %s",
+ real_path ? real_path : "null",
+ inode ? uuid_utoa(inode->gfid) : "null");
+ ret = -1;
+ goto out;
+ }
+
+ ret = posix_fetch_mdata_xattr(this, real_path, _fd, inode, mdata,
+ &op_errno);
+
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ if (inode) {
+ ctx = (uint64_t)(uintptr_t)mdata;
+ __inode_ctx_set1(inode, this, &ctx);
+ }
+ } else {
+ /* Failed to get mdata from disk, xattr missing.
+ * This happens when the file is created before
+ * ctime is enabled.
+ */
+ if (stbuf && op_errno != ENOENT) {
+ ret = 0;
+ GF_FREE(mdata);
+ goto out;
+ } else {
+ /* This case should not be hit. If it hits,
+ * don't fail, log warning, free mdata and move
+ * on
+ */
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ inode ? uuid_utoa(inode->gfid) : "null",
+ GF_XATTR_MDATA_KEY);
+ GF_FREE(mdata);
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+ /* Not set in inode context, hence free mdata */
+ if (!inode) {
+ GF_FREE(mdata);
+ }
+
+out:
+ return ret;
+}
+
+/* posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with inode lock.
+ */
+int
+posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __posix_get_mdata_xattr(this, real_path, _fd, inode, stbuf);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+static int
+posix_compare_timespec(struct timespec *first, struct timespec *second)
+{
+ if (first->tv_sec == second->tv_sec)
+ return first->tv_nsec - second->tv_nsec;
+ else
+ return first->tv_sec - second->tv_sec;
+}
+
+int
+posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+ const char *realpath,
+ struct mdata_iatt *mdata_iatt, int *op_errno)
+{
+ uint64_t ctx;
+ posix_mdata_t *mdata = NULL;
+ posix_mdata_t imdata = {
+ 0,
+ };
+ int ret = 0;
+ gf_boolean_t mdata_already_set = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("posix", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, this, &ctx);
+ if (ret == 0 && ctx) {
+ mdata = (posix_mdata_t *)(uintptr_t)ctx;
+ mdata_already_set = _gf_true;
+ } else {
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
+ "Could not allocate mdata. gfid: %s",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ *op_errno = ENOMEM;
+ goto unlock;
+ }
+
+ ret = posix_fetch_mdata_xattr(this, realpath, -1, inode,
+ (void *)mdata, op_errno);
+ if (ret == 0) {
+ /* Got mdata from disk. This is a race, another client
+ * has healed the xattr during lookup. So set it in inode
+ * ctx */
+ ctx = (uint64_t)(uintptr_t)mdata;
+ __inode_ctx_set1(inode, this, &ctx);
+ mdata_already_set = _gf_true;
+ } else {
+ *op_errno = 0;
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
+ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
+ mdata->atime.tv_sec = mdata_iatt->ia_atime;
+ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
+ mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
+ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
+
+ ctx = (uint64_t)(uintptr_t)mdata;
+ __inode_ctx_set1(inode, this, &ctx);
+ }
+ }
+
+ if (mdata_already_set) {
+ /* Compare and update the larger time */
+ imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
+ imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
+ imdata.atime.tv_sec = mdata_iatt->ia_atime;
+ imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
+ imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
+ imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
+
+ if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
+ mdata->ctime = imdata.ctime;
+ }
+ if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
+ mdata->mtime = imdata.mtime;
+ }
+ if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
+ mdata->atime = imdata.atime;
+ }
+ }
+
+ ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED,
+ "gfid: %s key:%s ", uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ *op_errno = errno;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+/* posix_set_mdata_xattr updates the posix_mdata_t based on the flag
+ * in inode context and stores it on disk
+ */
+static int
+posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *time,
+ struct timespec *u_atime, struct timespec *u_mtime,
+ struct iatt *stbuf, posix_mdata_flag_t *flag,
+ gf_boolean_t update_utime)
+{
+ uint64_t ctx;
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO("posix", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, time, out);
+
+ if (update_utime && (flag->atime && !u_atime) &&
+ (flag->mtime && !u_mtime)) {
+ goto out;
+ }
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, this, &ctx);
+ if (ret == 0) {
+ mdata = (posix_mdata_t *)(uintptr_t)ctx;
+ }
+ if (ret == -1 || !mdata) {
+ /*
+ * Do we need to fetch the data from xattr
+ * If we does we can compare the value and store
+ * the largest data in inode ctx.
+ */
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
+ "Could not allocate mdata. file: %s: gfid: %s",
+ real_path ? real_path : "null", uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = posix_fetch_mdata_xattr(this, real_path, fd, inode,
+ (void *)mdata, &op_errno);
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ ctx = (uint64_t)(uintptr_t)mdata;
+ __inode_ctx_set1(inode, this, &ctx);
+ } else {
+ /*
+ * This is the first time creating the time attr. This happens
+ * when you activate this feature. On this code path, only new
+ * files will create mdata xattr. The legacy files (files
+ * created before ctime enabled) will not have any xattr set.
+ * The xattr on legacy file will be set via lookup.
+ */
+
+ /* Don't create xattr with utimes/utimensat, only update if
+ * present. This otherwise causes issues during inservice
+ * upgrade. It causes inconsistent xattr values with in replica
+ * set. The scenario happens during upgrade where clients are
+ * older versions (without the ctime feature) and the server is
+ * upgraded to the new version (with the ctime feature which
+ * is enabled by default).
+ */
+
+ if (update_utime) {
+ UNLOCK(&inode->lock);
+ GF_FREE(mdata);
+ return 0;
+ }
+
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = time->tv_sec;
+ mdata->ctime.tv_nsec = time->tv_nsec;
+ mdata->atime.tv_sec = time->tv_sec;
+ mdata->atime.tv_nsec = time->tv_nsec;
+ mdata->mtime.tv_sec = time->tv_sec;
+ mdata->mtime.tv_nsec = time->tv_nsec;
+
+ ctx = (uint64_t)(uintptr_t)mdata;
+ __inode_ctx_set1(inode, this, &ctx);
+ }
+ }
+
+ /* In distributed systems, there could be races with fops
+ * updating mtime/atime which could result in different
+ * mtime/atime for same file. So this makes sure, only the
+ * highest time is retained. If the mtime/atime update comes
+ * from the explicit utime syscall, it is allowed to set to
+ * previous or future time but the ctime is always set to
+ * current time.
+ */
+ if (update_utime) {
+ if (flag->ctime &&
+ posix_compare_timespec(time, &mdata->ctime) > 0) {
+ mdata->ctime = *time;
+ }
+ if (flag->mtime) {
+ mdata->mtime = *u_mtime;
+ }
+ if (flag->atime) {
+ mdata->atime = *u_atime;
+ }
+ } else {
+ if (flag->ctime &&
+ posix_compare_timespec(time, &mdata->ctime) > 0) {
+ mdata->ctime = *time;
+ }
+ if (flag->mtime &&
+ posix_compare_timespec(time, &mdata->mtime) > 0) {
+ mdata->mtime = *time;
+ }
+ if (flag->atime &&
+ posix_compare_timespec(time, &mdata->atime) > 0) {
+ mdata->atime = *time;
+ }
+ }
+
+ if (inode->ia_type == IA_INVAL) {
+ /*
+ * TODO: This is non-linked inode. So we have to sync the
+ * data into backend. Because inode_link may return
+ * a different inode.
+ */
+ /* ret = posix_store_mdata_xattr (this, loc, fd,
+ * mdata); */
+ }
+ /*
+ * With this patch set, we are setting the xattr for each update
+ * We should evaluate the performance, and based on that we can
+ * decide on asynchronous updation.
+ */
+ ret = posix_store_mdata_xattr(this, real_path, fd, inode, mdata);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ", real_path ? real_path : "null",
+ uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY);
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+out:
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+ return ret;
+}
+
+/* posix_update_utime_in_mdata updates the posix_mdata_t when mtime/atime
+ * is modified using syscall
+ */
+void
+posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *ctime,
+ struct iatt *stbuf, int valid)
+{
+ int32_t ret = 0;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv_atime = {
+ 0,
+ };
+ struct timespec tv_mtime = {
+ 0,
+ };
+#else
+ struct timeval tv_atime = {
+ 0,
+ };
+ struct timeval tv_mtime = {
+ 0,
+ };
+#endif
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ /* NOTE:
+ * This routine (utimes) is intentionally allowed for all internal and
+ * external clients even if ctime is not set. This is because AFR and
+ * WORM uses time attributes for it's internal operations
+ */
+ if (inode && priv->ctime) {
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv_atime.tv_sec = stbuf->ia_atime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec);
+
+ flag.ctime = 1;
+ flag.atime = 1;
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv_mtime.tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec);
+
+ flag.ctime = 1;
+ flag.mtime = 1;
+ }
+
+ if (flag.mtime || flag.atime) {
+ ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime,
+ &tv_atime, &tv_mtime, NULL, &flag,
+ _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata atime failed on file:"
+ " %s gfid:%s",
+ real_path, uuid_utoa(inode->gfid));
+ }
+ }
+ }
+ return;
+}
+
+/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs
+ * to be modified
+ */
+void
+posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *ctime,
+ struct iatt *stbuf, int valid)
+{
+ int32_t ret = 0;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv_ctime = {
+ 0,
+ };
+#else
+ struct timeval tv_ctime = {
+ 0,
+ };
+#endif
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+
+ struct posix_private *priv = NULL;
+ priv = this->private;
+
+ if (inode && priv->ctime) {
+ tv_ctime.tv_sec = stbuf->ia_ctime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec);
+ flag.ctime = 1;
+
+ ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL,
+ NULL, NULL, &flag, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata atime failed on file:"
+ " %s gfid:%s",
+ real_path, uuid_utoa(inode->gfid));
+ }
+ }
+ return;
+}
+
+static void
+posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag)
+{
+ if (!flag)
+ return;
+
+ flag->ctime = 0;
+ flag->atime = 0;
+ flag->mtime = 0;
+
+ if (flags & MDATA_CTIME)
+ flag->ctime = 1;
+ if (flags & MDATA_MTIME)
+ flag->mtime = 1;
+ if (flags & MDATA_ATIME)
+ flag->atime = 1;
+}
+
+static void
+posix_get_parent_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag)
+{
+ if (!flag)
+ return;
+
+ flag->ctime = 0;
+ flag->atime = 0;
+ flag->mtime = 0;
+
+ if (flags & MDATA_PAR_CTIME)
+ flag->ctime = 1;
+ if (flags & MDATA_PAR_MTIME)
+ flag->mtime = 1;
+ if (flags & MDATA_PAR_ATIME)
+ flag->atime = 1;
+}
+
+void
+posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+ int fd, inode_t *inode, struct iatt *stbuf)
+{
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->ctime) {
+ (void)posix_get_mdata_flag(frame->root->flags, &flag);
+ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+ goto out;
+ }
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+ &frame->root->ctime, NULL, NULL, stbuf,
+ &flag, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path,
+ inode ? uuid_utoa(inode->gfid) : "No inode");
+ }
+ }
+out:
+ return;
+}
+
+void
+posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+ const char *real_path, int fd, inode_t *inode,
+ struct iatt *stbuf)
+{
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (inode && priv->ctime) {
+ (void)posix_get_parent_mdata_flag(frame->root->flags, &flag);
+ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+ goto out;
+ }
+ ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+ &frame->root->ctime, NULL, NULL, stbuf,
+ &flag, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path,
+ uuid_utoa(inode->gfid));
+ }
+ }
+out:
+ return;
+}
+
+void
+posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *real_path_out,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out)
+{
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+ posix_mdata_flag_t flag_dup = {
+ 0,
+ };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+ char in_uuid_str[64] = {0}, out_uuid_str[64] = {0};
+
+ priv = this->private;
+
+ if (priv->ctime) {
+ (void)posix_get_mdata_flag(frame->root->flags, &flag);
+ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+ goto out;
+ }
+
+ if (frame->root->ctime.tv_sec == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed, No ctime : in: %s gfid_in:%s "
+ "out: %s gfid_out:%s",
+ real_path_in,
+ (inode_in ? uuid_utoa_r(inode_in->gfid, in_uuid_str)
+ : "No inode"),
+ real_path_out,
+ (inode_out ? uuid_utoa_r(inode_out->gfid, out_uuid_str)
+ : "No inode"));
+ goto out;
+ }
+
+ flag_dup = flag;
+
+ /*
+ * For the destination file, no need to update atime.
+ * It got modified. Hence the things that need to be
+ * changed are mtime and ctime (provided the utime
+ * xlator from the client has set those flags, which
+ * are just copied to flag_dup).
+ */
+ if (flag.atime)
+ flag_dup.atime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out,
+ &frame->root->ctime, NULL, NULL, stbuf_out,
+ &flag_dup, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_out,
+ inode_out ? uuid_utoa(inode_out->gfid) : "No inode");
+ }
+
+ /*
+ * For the source file, no need to change the mtime and ctime.
+ * For source file, it is only read operation. So, if at all
+ * anything needs to be updated, it is only the atime.
+ */
+ if (flag.atime)
+ flag_dup.atime = flag.atime;
+ flag_dup.mtime = 0;
+ flag_dup.ctime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out,
+ &frame->root->ctime, NULL, NULL, stbuf_out,
+ &flag_dup, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_in,
+ inode_in ? uuid_utoa(inode_in->gfid) : "No inode");
+ }
+ }
+out:
+ return;
+}
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
new file mode 100644
index 00000000000..d37014af93e
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_METADATA_H
+#define _POSIX_METADATA_H
+
+#include "posix-metadata-disk.h"
+
+/* In memory representation posix metadata xattr */
+typedef struct {
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+
+ char _pad[7]; /* manual padding */
+} posix_mdata_t;
+
+typedef struct {
+ unsigned short ctime : 1;
+ unsigned short mtime : 1;
+ unsigned short atime : 1;
+} posix_mdata_flag_t;
+
+/* With inode lock*/
+int
+posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+/* With out inode lock*/
+int
+__posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+void
+posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *ctime,
+ struct iatt *stbuf, int valid);
+void
+posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *ctime,
+ struct iatt *stbuf, int valid);
+void
+posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+ int fd, inode_t *inode, struct iatt *stbuf);
+void
+posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+ const char *real_path, int fd, inode_t *inode,
+ struct iatt *stbuf);
+void
+posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *read_path_put,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
+int
+posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+ const char *realpath,
+ struct mdata_iatt *mdata_iatt,
+ int *op_errno);
+void
+posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in);
+
+#endif /* _POSIX_METADATA_H */
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 72c52339e5e..42b965434b9 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -7,5873 +7,95 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#define __XOPEN_SOURCE 500
-#include <openssl/md5.h>
-#include <stdint.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <errno.h>
-#include <libgen.h>
-#include <pthread.h>
-#include <ftw.h>
-#include <sys/stat.h>
-#include <signal.h>
-#include <sys/uio.h>
-
-#ifndef GF_BSD_HOST_OS
-#include <alloca.h>
-#endif /* GF_BSD_HOST_OS */
-
-#ifdef HAVE_LINKAT
-#include <fcntl.h>
-#endif /* HAVE_LINKAT */
-
-#include "glusterfs.h"
-#include "checksum.h"
-#include "dict.h"
-#include "logging.h"
-#include "posix.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "syscall.h"
-#include "statedump.h"
-#include "locking.h"
-#include "timer.h"
-#include "glusterfs3-xdr.h"
-#include "hashfn.h"
-#include "posix-aio.h"
-#include "glusterfs-acl.h"
-
-extern char *marker_xattrs[];
-#define ALIGN_SIZE 4096
-
-#undef HAVE_SET_FSID
-#ifdef HAVE_SET_FSID
-
-#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid;
-
-#define SET_FS_ID(uid, gid) do { \
- old_fsuid = setfsuid (uid); \
- old_fsgid = setfsgid (gid); \
- } while (0)
-
-#define SET_TO_OLD_FS_ID() do { \
- setfsuid (old_fsuid); \
- setfsgid (old_fsgid); \
- } while (0)
-
-#else
-
-#define DECLARE_OLD_FS_ID_VAR
-#define SET_FS_ID(uid, gid)
-#define SET_TO_OLD_FS_ID()
-
-#endif
-int
-posix_forget (xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_cache = 0;
- if (!inode_ctx_del (inode, this, &tmp_cache))
- dict_destroy ((dict_t *)(long)tmp_cache);
-
- return 0;
-}
-
-/* Regular fops */
-
-int32_t
-posix_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
-{
- struct iatt buf = {0, };
- int32_t op_ret = -1;
- int32_t entry_ret = 0;
- int32_t op_errno = 0;
- dict_t * xattr = NULL;
- char * real_path = NULL;
- char * par_path = NULL;
- struct iatt postparent = {0,};
- int32_t gfidless = 0;
- struct posix_private *priv = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
-
- /* The Hidden directory should be for housekeeping purpose and it
- should not get any gfid on it */
- if (__is_root_gfid (loc->pargfid) && loc->name
- && (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Lookup issued on %s, which is not permitted",
- GF_HIDDEN_PATH);
- op_errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless);
- op_ret = -1;
- if (uuid_is_null (loc->pargfid) || (loc->name == NULL)) {
- /* nameless lookup */
- MAKE_INODE_HANDLE (real_path, this, loc, &buf);
- } else {
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf);
-
- if (uuid_is_null (loc->inode->gfid)) {
- posix_gfid_heal (this, real_path, loc, xdata);
- MAKE_ENTRY_HANDLE (real_path, par_path, this,
- loc, &buf);
- }
- }
-
- op_errno = errno;
-
- if (op_ret == -1) {
- if (op_errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- real_path, strerror (op_errno));
- }
-
- entry_ret = -1;
- goto parent;
- }
-
- if (xdata && (op_ret == 0)) {
- xattr = posix_lookup_xattr_fill (this, real_path, loc,
- xdata, &buf);
- }
-
-parent:
- if (par_path) {
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- if (op_errno == ENOENT)
- /* If parent directory is missing in a lookup,
- errno should be ESTALE (bad handle) and not
- ENOENT (missing entry)
- */
- op_errno = ESTALE;
- goto out;
- }
- }
-
- op_ret = entry_ret;
-out:
- if (xattr)
- dict_ref (xattr);
-
- if (!op_ret && !gfidless && uuid_is_null (buf.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for "
- "%s", (real_path) ? real_path: "");
- op_ret = -1;
- op_errno = ENODATA;
- }
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &buf, xattr, &postparent);
-
- if (xattr)
- dict_unref (xattr);
-
- return 0;
-}
-
-
-int32_t
-posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- struct iatt buf = {0,};
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct posix_private *priv = NULL;
- char *real_path = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- MAKE_INODE_HANDLE (real_path, this, loc, &buf);
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, (op_errno == ENOENT)?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "lstat on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID();
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL);
-
- return 0;
-}
-
-static int
-posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf)
-{
- int32_t ret = -1;
- mode_t mode = 0;
- struct stat stat;
- int is_symlink = 0;
-
- ret = sys_lstat (path, &stat);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "lstat failed: %s (%s)", path, strerror (errno));
- goto out;
- }
-
- if (S_ISLNK (stat.st_mode))
- is_symlink = 1;
-
- mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- ret = lchmod (path, mode);
- if ((ret == -1) && (errno == ENOSYS)) {
- /* in Linux symlinks are always in mode 0777 and no
- such call as lchmod exists.
- */
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%s)", path, strerror (errno));
- if (is_symlink) {
- ret = 0;
- goto out;
- }
-
- ret = chmod (path, mode);
- }
-out:
- return ret;
-}
-
-static int
-posix_do_chown (xlator_t *this,
- const char *path,
- struct iatt *stbuf,
- int32_t valid)
-{
- int32_t ret = -1;
- uid_t uid = -1;
- gid_t gid = -1;
-
- if (valid & GF_SET_ATTR_UID)
- uid = stbuf->ia_uid;
-
- if (valid & GF_SET_ATTR_GID)
- gid = stbuf->ia_gid;
-
- ret = lchown (path, uid, gid);
-
- return ret;
-}
-
-static int
-posix_do_utimes (xlator_t *this,
- const char *path,
- struct iatt *stbuf)
-{
- int32_t ret = -1;
- struct timeval tv[2] = {{0,},{0,}};
- struct stat stat;
- int is_symlink = 0;
-
- ret = sys_lstat (path, &stat);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s (%s)", path, strerror (errno));
- goto out;
- }
-
- if (S_ISLNK (stat.st_mode))
- is_symlink = 1;
-
- tv[0].tv_sec = stbuf->ia_atime;
- tv[0].tv_usec = stbuf->ia_atime_nsec / 1000;
- tv[1].tv_sec = stbuf->ia_mtime;
- tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000;
-
- ret = lutimes (path, tv);
- if ((ret == -1) && (errno == ENOSYS)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%s)", path, strerror (errno));
- if (is_symlink) {
- ret = 0;
- goto out;
- }
-
- ret = utimes (path, tv);
- }
-
-out:
- return ret;
-}
-
-int
-posix_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = 0;
- struct iatt statpre = {0,};
- struct iatt statpost = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_INODE_HANDLE (real_path, this, loc, &statpre);
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (lstat) on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
- op_ret = posix_do_chown (this, real_path, stbuf, valid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (chown) on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
- }
-
- if (valid & GF_SET_ATTR_MODE) {
- op_ret = posix_do_chmod (this, real_path, stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (chmod) on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
- }
-
- if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- op_ret = posix_do_utimes (this, real_path, stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (utimes) on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
- }
-
- if (!valid) {
- op_ret = lchown (real_path, -1, -1);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lchown (%s, -1, -1) failed => (%s)",
- real_path, strerror (op_errno));
-
- goto out;
- }
- }
-
- op_ret = posix_pstat (this, loc->gfid, real_path, &statpost);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "setattr (lstat) on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno,
- &statpre, &statpost, NULL);
-
- return 0;
-}
-
-int32_t
-posix_do_fchown (xlator_t *this,
- int fd,
- struct iatt *stbuf,
- int32_t valid)
-{
- int ret = -1;
- uid_t uid = -1;
- gid_t gid = -1;
-
- if (valid & GF_SET_ATTR_UID)
- uid = stbuf->ia_uid;
-
- if (valid & GF_SET_ATTR_GID)
- gid = stbuf->ia_gid;
-
- ret = fchown (fd, uid, gid);
-
- return ret;
-}
-
-
-int32_t
-posix_do_fchmod (xlator_t *this,
- int fd, struct iatt *stbuf)
-{
- mode_t mode = 0;
-
- mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- return fchmod (fd, mode);
-}
-
-static int
-posix_do_futimes (xlator_t *this,
- int fd,
- struct iatt *stbuf)
-{
- gf_log (this->name, GF_LOG_WARNING, "function not implemented fd(%d)", fd);
-
- errno = ENOSYS;
- return -1;
-}
-
-int
-posix_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct iatt statpre = {0,};
- struct iatt statpost = {0,};
- struct posix_fd *pfd = NULL;
- int32_t ret = -1;
-
- DECLARE_OLD_FS_ID_VAR;
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
-
- op_ret = posix_fdstat (this, pfd->fd, &statpre);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fstat) failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- if (valid & GF_SET_ATTR_MODE) {
- op_ret = posix_do_fchmod (this, pfd->fd, stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fchmod) failed on fd=%p: %s",
- fd, strerror (op_errno));
- goto out;
- }
- }
-
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fchown) failed on fd=%p: %s",
- fd, strerror (op_errno));
- goto out;
- }
-
- }
-
- if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- op_ret = posix_do_futimes (this, pfd->fd, stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (futimes) on failed fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
- }
-
- if (!valid) {
- op_ret = fchown (pfd->fd, -1, -1);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fchown (%d, -1, -1) failed => (%s)",
- pfd->fd, strerror (op_errno));
-
- goto out;
- }
- }
-
- op_ret = posix_fdstat (this, pfd->fd, &statpost);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsetattr (fstat) failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- &statpre, &statpost, NULL);
-
- return 0;
-}
-
-#ifdef FALLOC_FL_KEEP_SIZE
-static int32_t
-posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- off_t offset, size_t len, struct iatt *statpre,
- struct iatt *statpost)
-{
- struct posix_fd *pfd = NULL;
- int32_t ret = -1;
-
- DECLARE_OLD_FS_ID_VAR;
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
-
- ret = posix_fdstat (this, pfd->fd, statpre);
- if (ret == -1) {
- ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fallocate (fstat) failed on fd=%p: %s", fd,
- strerror (errno));
- goto out;
- }
-
- ret = sys_fallocate(pfd->fd, flags, offset, len);
- if (ret == -1) {
- ret = -errno;
- goto out;
- }
-
- ret = posix_fdstat (this, pfd->fd, statpost);
- if (ret == -1) {
- ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fallocate (fstat) failed on fd=%p: %s", fd,
- strerror (errno));
- goto out;
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- return ret;
-}
-#endif /* FALLOC_FL_KEEP_SIZE */
-
-char*
-_page_aligned_alloc (size_t size, char **aligned_buf)
-{
- char *alloc_buf = NULL;
- char *buf = NULL;
-
- alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char);
- if (!alloc_buf)
- goto out;
- /* page aligned buffer */
- buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
- *aligned_buf = buf;
-out:
- return alloc_buf;
-}
-
-static int32_t
-_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct)
-{
- off_t num_vect = 0;
- off_t num_loop = 1;
- off_t idx = 0;
- int32_t op_ret = -1;
- int32_t vect_size = VECTOR_SIZE;
- off_t remain = 0;
- off_t extra = 0;
- struct iovec *vector = NULL;
- char *iov_base = NULL;
- char *alloc_buf = NULL;
-
- if (len == 0)
- return 0;
- if (len < VECTOR_SIZE)
- vect_size = len;
-
- num_vect = len / (vect_size);
- remain = len % vect_size ;
- if (num_vect > MAX_NO_VECT) {
- extra = num_vect % MAX_NO_VECT;
- num_loop = num_vect / MAX_NO_VECT;
- num_vect = MAX_NO_VECT;
- }
-
- vector = GF_CALLOC (num_vect, sizeof(struct iovec),
- gf_common_mt_iovec);
- if (!vector)
- return -1;
- if (o_direct) {
- alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
- if (!alloc_buf) {
- gf_log ("_posix_do_zerofill", GF_LOG_DEBUG,
- "memory alloc failed, vect_size %d: %s",
- vect_size, strerror(errno));
- GF_FREE(vector);
- return -1;
- }
- } else {
- iov_base = GF_CALLOC (vect_size, sizeof(char),
- gf_common_mt_char);
- if (!iov_base) {
- GF_FREE(vector);
- return -1;
- }
- }
-
- for (idx = 0; idx < num_vect; idx++) {
- vector[idx].iov_base = iov_base;
- vector[idx].iov_len = vect_size;
- }
- if (lseek(fd, offset, SEEK_SET) < 0) {
- op_ret = -1;
- goto err;
- }
-
- for (idx = 0; idx < num_loop; idx++) {
- op_ret = writev(fd, vector, num_vect);
- if (op_ret < 0)
- goto err;
- }
- if (extra) {
- op_ret = writev(fd, vector, extra);
- if (op_ret < 0)
- goto err;
- }
- if (remain) {
- vector[0].iov_len = remain;
- op_ret = writev(fd, vector , 1);
- if (op_ret < 0)
- goto err;
- }
-err:
- if (o_direct)
- GF_FREE(alloc_buf);
- else
- GF_FREE(iov_base);
- GF_FREE(vector);
- return op_ret;
-}
-
-static int32_t
-posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, off_t len, struct iatt *statpre,
- struct iatt *statpost)
-{
- struct posix_fd *pfd = NULL;
- int32_t ret = -1;
-
- DECLARE_OLD_FS_ID_VAR;
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
-
- ret = posix_fdstat (this, pfd->fd, statpre);
- if (ret == -1) {
- ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation fstat failed on fd = %p: %s", fd,
- strerror (errno));
- goto out;
- }
- ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
- if (ret < 0) {
- ret = -errno;
- gf_log(this->name, GF_LOG_ERROR,
- "zerofill failed on fd %d length %" PRId64 " %s",
- pfd->fd, len, strerror(errno));
- goto out;
- }
- if (pfd->flags & (O_SYNC|O_DSYNC)) {
- ret = fsync (pfd->fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "fsync() in writev on fd %d failed: %s",
- pfd->fd, strerror (errno));
- ret = -errno;
- goto out;
- }
- }
-
- ret = posix_fdstat (this, pfd->fd, statpost);
- if (ret == -1) {
- ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post operation fstat failed on fd=%p: %s", fd,
- strerror (errno));
- goto out;
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- return ret;
-}
-
-static int32_t
-_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
- off_t offset, size_t len, dict_t *xdata)
-{
- int32_t ret;
-#ifndef FALLOC_FL_KEEP_SIZE
- ret = EOPNOTSUPP;
-
-#else /* FALLOC_FL_KEEP_SIZE */
- int32_t flags = 0;
- struct iatt statpre = {0,};
- struct iatt statpost = {0,};
-
- if (keep_size)
- flags = FALLOC_FL_KEEP_SIZE;
-
- ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
- &statpre, &statpost);
- if (ret < 0)
- goto err;
-
- STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
- return 0;
-
-err:
-#endif /* FALLOC_FL_KEEP_SIZE */
- STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
- return 0;
-}
-
-static int32_t
-posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- int32_t ret;
-#ifndef FALLOC_FL_KEEP_SIZE
- ret = EOPNOTSUPP;
-
-#else /* FALLOC_FL_KEEP_SIZE */
- int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE;
- struct iatt statpre = {0,};
- struct iatt statpost = {0,};
-
- ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
- &statpre, &statpost);
- if (ret < 0)
- goto err;
-
- STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL);
- return 0;
-
-err:
-#endif /* FALLOC_FL_KEEP_SIZE */
- STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL);
- return 0;
-}
-
-static int32_t
-posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
-{
- int32_t ret = 0;
- struct iatt statpre = {0,};
- struct iatt statpost = {0,};
-
- ret = posix_do_zerofill(frame, this, fd, offset, len,
- &statpre, &statpost);
- if (ret < 0)
- goto err;
-
- STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
- return 0;
-
-err:
- STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
- return 0;
-
-}
-
-int32_t
-posix_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- char * real_path = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- DIR * dir = NULL;
- struct posix_fd * pfd = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (fd, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- op_ret = -1;
- dir = opendir (real_path);
-
- if (dir == NULL) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = dirfd (dir);
- if (op_ret < 0) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "dirfd() failed on %s: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
- if (!pfd) {
- op_errno = errno;
- goto out;
- }
-
- pfd->dir = dir;
- pfd->fd = dirfd (dir);
-
- op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
- if (op_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set the fd context path=%s fd=%p",
- real_path, fd);
-
- op_ret = 0;
-
-out:
- if (op_ret == -1) {
- if (dir) {
- closedir (dir);
- dir = NULL;
- }
- if (pfd) {
- GF_FREE (pfd);
- pfd = NULL;
- }
- }
-
- SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL);
- return 0;
-}
-
-int32_t
-posix_releasedir (xlator_t *this,
- fd_t *fd)
-{
- struct posix_fd * pfd = NULL;
- uint64_t tmp_pfd = 0;
- int ret = 0;
-
- struct posix_private *priv = NULL;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = fd_ctx_del (fd, this, &tmp_pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd from fd=%p is NULL", fd);
- goto out;
- }
-
- pfd = (struct posix_fd *)(long)tmp_pfd;
- if (!pfd->dir) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd->dir is NULL for fd=%p", fd);
- goto out;
- }
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->janitor_lock);
- {
- INIT_LIST_HEAD (&pfd->list);
- list_add_tail (&pfd->list, &priv->janitor_fds);
- pthread_cond_signal (&priv->janitor_cond);
- }
- pthread_mutex_unlock (&priv->janitor_lock);
-
-out:
- return 0;
-}
-
-
-int32_t
-posix_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata)
-{
- char * dest = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = NULL;
- struct iatt stbuf = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- dest = alloca (size + 1);
-
- MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = readlink (real_path, dest, size);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "readlink on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- dest[op_ret] = 0;
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL);
-
- return 0;
-}
-
-
-int
-posix_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
-{
- int tmp_fd = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = 0;
- char *par_path = 0;
- struct iatt stbuf = { 0, };
- char was_present = 1;
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- void * uuid_req = NULL;
- int32_t nlink_samepgfid = 0;
- char *pgfid_xattr_key = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
-
- gid = frame->root->gid;
-
- SET_FS_ID (frame->root->uid, gid);
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- if (preparent.ia_prot.sgid) {
- gid = preparent.ia_gid;
- }
-
- /* Check if the 'gfid' already exists, because this mknod may be an
- internal call from distribute for creating 'linkfile', and that
- linkfile may be for a hardlinked file */
- if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
- dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY);
- op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
- if (op_ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get the gfid from dict for %s",
- loc->path);
- goto real_op;
- }
- op_ret = posix_create_link_if_gfid_exists (this, uuid_req,
- real_path);
- if (!op_ret)
- goto post_op;
- }
-
-real_op:
-#ifdef __NetBSD__
- if (S_ISFIFO(mode))
- op_ret = mkfifo (real_path, mode);
- else
-#endif /* __NetBSD__ */
- op_ret = mknod (real_path, mode, dev);
-
- if (op_ret == -1) {
- op_errno = errno;
- if ((op_errno == EINVAL) && S_ISREG (mode)) {
- /* Over Darwin, mknod with (S_IFREG|mode)
- doesn't work */
- tmp_fd = creat (real_path, mode);
- if (tmp_fd == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s: %s",
- real_path, strerror (errno));
- goto out;
- }
- close (tmp_fd);
- } else {
-
- gf_log (this->name, GF_LOG_ERROR,
- "mknod on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
- }
-
- op_ret = posix_gfid_set (this, real_path, loc, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting gfid on %s failed", real_path);
- }
-
-#ifndef HAVE_SET_FSID
- op_ret = lchown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lchown on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-#endif
-
-post_op:
- op_ret = posix_acl_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting ACLs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- if (priv->update_pgfid_nlinks) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
- loc->pargfid);
- nlink_samepgfid = 1;
-
- SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
- XATTR_CREATE, op_ret, this, ignore);
- }
-
-ignore:
- op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting xattrs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- op_ret = posix_pstat (this, NULL, real_path, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "mknod on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent, NULL);
-
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_path);
- }
-
- return 0;
-}
-
-
-int
-posix_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- char *par_path = NULL;
- struct iatt stbuf = {0, };
- char was_present = 1;
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- /* The Hidden directory should be for housekeeping purpose and it
- should not get created from a user request */
- if (__is_root_gfid (loc->pargfid) &&
- (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
- gf_log (this->name, GF_LOG_WARNING,
- "mkdir issued on %s, which is not permitted",
- GF_HIDDEN_PATH);
- op_errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
-
- gid = frame->root->gid;
-
- op_ret = posix_pstat (this, NULL, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
- }
-
- SET_FS_ID (frame->root->uid, gid);
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- if (preparent.ia_prot.sgid) {
- gid = preparent.ia_gid;
- mode |= S_ISGID;
- }
-
- op_ret = mkdir (real_path, mode);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "mkdir of %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_gfid_set (this, real_path, loc, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting gfid on %s failed", real_path);
- }
-
-#ifndef HAVE_SET_FSID
- op_ret = chown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-#endif
- op_ret = posix_acl_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting ACLs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting xattrs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- op_ret = posix_pstat (this, NULL, real_path, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent, NULL);
-
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_path);
- }
-
- return 0;
-}
-
-
-int32_t
-posix_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- char *par_path = NULL;
- int32_t fd = -1;
- struct iatt stbuf = {0,};
- struct posix_private *priv = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- char *pgfid_xattr_key = NULL;
- int32_t nlink_samepgfid = 0;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- if (stbuf.ia_nlink == 1)
- posix_handle_unset (this, stbuf.ia_gfid, NULL);
-
- priv = this->private;
- if (priv->background_unlink) {
- if (IA_ISREG (loc->inode->ia_type)) {
- fd = open (real_path, O_RDONLY);
- if (fd == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
- }
- }
-
- if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
- loc->pargfid);
- LOCK (&loc->inode->lock);
- {
- UNLINK_MODIFY_PGFID_XATTR (real_path, pgfid_xattr_key,
- nlink_samepgfid, 0, op_ret,
- this, unlock);
- }
- unlock:
- UNLOCK (&loc->inode->lock);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "modification of "
- "parent gfid xattr failed (path:%s gfid:%s)",
- real_path, uuid_utoa (loc->inode->gfid));
- goto out;
- }
- }
-
- op_ret = sys_unlink (real_path);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "unlink of %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- &preparent, &postparent, NULL);
-
- if (fd != -1) {
- close (fd);
- }
-
- return 0;
-}
-
-
-int
-posix_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = NULL;
- char * par_path = NULL;
- char * gfid_str = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- struct iatt stbuf;
- struct posix_private *priv = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- /* The Hidden directory should be for housekeeping purpose and it
- should not get deleted from inside process */
- if (__is_root_gfid (loc->pargfid) &&
- (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
- gf_log (this->name, GF_LOG_WARNING,
- "rmdir issued on %s, which is not permitted",
- GF_HIDDEN_PATH);
- op_errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- priv = this->private;
-
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- if (flags) {
- gfid_str = uuid_utoa (stbuf.ia_gfid);
- char *tmp_path = alloca (strlen (priv->trash_path) +
- strlen ("/") +
- strlen (gfid_str) + 1);
-
- mkdir (priv->trash_path, 0755);
- sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str);
- op_ret = rename (real_path, tmp_path);
- } else {
- op_ret = rmdir (real_path);
- }
- op_errno = errno;
-
- if (op_ret == 0) {
- posix_handle_unset (this, stbuf.ia_gfid, NULL);
- }
-
- if (op_errno == EEXIST)
- /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
- op_errno = ENOTEMPTY;
-
- /* No need to log a common error as ENOTEMPTY */
- if (op_ret == -1 && op_errno != ENOTEMPTY) {
- gf_log (this->name, GF_LOG_ERROR,
- "rmdir of %s failed: %s", real_path,
- strerror (op_errno));
- }
-
- if (op_ret == -1) {
- gf_log (this->name,
- (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR,
- "%s on %s failed", (flags) ? "rename" : "rmdir",
- real_path);
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- &preparent, &postparent, NULL);
-
- return 0;
-}
-
-
-int
-posix_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = 0;
- char * par_path = 0;
- struct iatt stbuf = { 0, };
- struct posix_private *priv = NULL;
- gid_t gid = 0;
- char was_present = 1;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- char *pgfid_xattr_key = NULL;
- int32_t nlink_samepgfid = 0;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (linkname, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
-
- if ((op_ret == -1) && (errno == ENOENT)){
- was_present = 0;
- }
-
- SET_FS_ID (frame->root->uid, gid);
-
- gid = frame->root->gid;
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- if (preparent.ia_prot.sgid) {
- gid = preparent.ia_gid;
- }
-
- op_ret = symlink (linkname, real_path);
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink of %s --> %s failed: %s",
- real_path, linkname, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_gfid_set (this, real_path, loc, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting gfid on %s failed", real_path);
- }
-
-#ifndef HAVE_SET_FSID
- op_ret = lchown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lchown failed on %s: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-#endif
- op_ret = posix_acl_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting ACLs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- if (priv->update_pgfid_nlinks) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
- loc->pargfid);
- nlink_samepgfid = 1;
- SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
- XATTR_CREATE, op_ret, this, ignore);
- }
-ignore:
- op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting xattrs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- op_ret = posix_pstat (this, NULL, real_path, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat failed on %s: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent, NULL);
-
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_path);
- }
-
- return 0;
-}
-
-
-int
-posix_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_oldpath = NULL;
- char *real_newpath = NULL;
- char *par_oldpath = NULL;
- char *par_newpath = NULL;
- struct iatt stbuf = {0, };
- struct posix_private *priv = NULL;
- char was_present = 1;
- struct iatt preoldparent = {0, };
- struct iatt postoldparent = {0, };
- struct iatt prenewparent = {0, };
- struct iatt postnewparent = {0, };
- char olddirid[64];
- char newdirid[64];
- uuid_t victim = {0};
- int was_dir = 0;
- int nlink = 0;
- char *pgfid_xattr_key = NULL;
- int32_t nlink_samepgfid = 0;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (oldloc, out);
- VALIDATE_OR_GOTO (newloc, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
- MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf);
-
- op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent %s failed: %s",
- par_oldpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- par_newpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)){
- was_present = 0;
- } else {
- uuid_copy (victim, stbuf.ia_gfid);
- if (IA_ISDIR (stbuf.ia_type))
- was_dir = 1;
- nlink = stbuf.ia_nlink;
- }
-
- if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) {
- gf_log (this->name, GF_LOG_WARNING,
- "found directory at %s while expecting ENOENT",
- real_newpath);
- op_ret = -1;
- op_errno = EEXIST;
- goto out;
- }
-
- if (was_present && IA_ISDIR(stbuf.ia_type) &&
- uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "found directory %s at %s while renaming %s",
- uuid_utoa_r (newloc->inode->gfid, olddirid),
- real_newpath,
- uuid_utoa_r (stbuf.ia_gfid, newdirid));
- op_ret = -1;
- op_errno = EEXIST;
- goto out;
- }
-
- if (IA_ISDIR (oldloc->inode->ia_type))
- posix_handle_unset (this, oldloc->inode->gfid, NULL);
-
- LOCK (&oldloc->inode->lock);
- {
- if (!IA_ISDIR (oldloc->inode->ia_type)
- && priv->update_pgfid_nlinks) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
- PGFID_XATTR_KEY_PREFIX,
- oldloc->pargfid);
- UNLINK_MODIFY_PGFID_XATTR (real_oldpath,
- pgfid_xattr_key,
- nlink_samepgfid, 0,
- op_ret,
- this, unlock);
- }
-
- op_ret = sys_rename (real_oldpath, real_newpath);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name,
- (op_errno == ENOTEMPTY ? GF_LOG_DEBUG
- : GF_LOG_ERROR),
- "rename of %s to %s failed: %s",
- real_oldpath, real_newpath,
- strerror (op_errno));
-
- if (priv->update_pgfid_nlinks
- && !IA_ISDIR (oldloc->inode->ia_type)) {
- LINK_MODIFY_PGFID_XATTR (real_oldpath,
- pgfid_xattr_key,
- nlink_samepgfid, 0,
- op_ret,
- this, unlock);
- }
-
- goto unlock;
- }
-
- if (!IA_ISDIR (oldloc->inode->ia_type)
- && priv->update_pgfid_nlinks) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
- PGFID_XATTR_KEY_PREFIX,
- newloc->pargfid);
- LINK_MODIFY_PGFID_XATTR (real_newpath,
- pgfid_xattr_key,
- nlink_samepgfid, 0,
- op_ret,
- this, unlock);
- }
- }
-unlock:
- UNLOCK (&oldloc->inode->lock);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "modification of "
- "parent gfid xattr failed (gfid:%s)",
- uuid_utoa (oldloc->inode->gfid));
- goto out;
- }
-
- if (was_dir)
- posix_handle_unset (this, victim, NULL);
-
- if (was_present && !was_dir && nlink == 1)
- posix_handle_unset (this, victim, NULL);
-
- if (IA_ISDIR (oldloc->inode->ia_type)) {
- posix_handle_soft (this, real_newpath, newloc,
- oldloc->inode->gfid, NULL);
- }
-
- op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- real_newpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_oldpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_newpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
-
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf,
- &preoldparent, &postoldparent,
- &prenewparent, &postnewparent, NULL);
-
- if ((op_ret == -1) && !was_present) {
- unlink (real_newpath);
- }
-
- return 0;
-}
-
-
-int
-posix_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_oldpath = 0;
- char *real_newpath = 0;
- char *par_newpath = 0;
- struct iatt stbuf = {0, };
- struct posix_private *priv = NULL;
- char was_present = 1;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
- int32_t nlink_samepgfid = 0;
- char *pgfid_xattr_key = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (oldloc, out);
- VALIDATE_OR_GOTO (newloc, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
-
- MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
- }
-
- op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
- par_newpath, strerror (op_errno));
- goto out;
- }
-
-
- op_ret = sys_link (real_oldpath, real_newpath);
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "link %s to %s failed: %s",
- real_oldpath, real_newpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- real_newpath, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
- par_newpath, strerror (op_errno));
- goto out;
- }
-
- if (priv->update_pgfid_nlinks) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
- newloc->pargfid);
-
- LOCK (&newloc->inode->lock);
- {
- LINK_MODIFY_PGFID_XATTR (real_newpath, pgfid_xattr_key,
- nlink_samepgfid, 0, op_ret,
- this, unlock);
- }
- unlock:
- UNLOCK (&newloc->inode->lock);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "modification of "
- "parent gfid xattr failed (path:%s gfid:%s)",
- real_newpath, uuid_utoa (newloc->inode->gfid));
- goto out;
- }
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno,
- (oldloc)?oldloc->inode:NULL, &stbuf, &preparent,
- &postparent, NULL);
-
- if ((op_ret == -1) && (!was_present)) {
- unlink (real_newpath);
- }
-
- return 0;
-}
-
-
-int32_t
-posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = 0;
- struct posix_private *priv = NULL;
- struct iatt prebuf = {0,};
- struct iatt postbuf = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- MAKE_INODE_HANDLE (real_path, this, loc, &prebuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = truncate (real_path, offset);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "truncate on %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- &prebuf, &postbuf, NULL);
-
- return 0;
-}
-
-
-int
-posix_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t _fd = -1;
- int _flags = 0;
- char * real_path = NULL;
- char * par_path = NULL;
- struct iatt stbuf = {0, };
- struct posix_fd * pfd = NULL;
- struct posix_private * priv = NULL;
- char was_present = 1;
-
- gid_t gid = 0;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
-
- int nlink_samepgfid = 0;
- char * pgfid_xattr_key = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
-
- gid = frame->root->gid;
-
- SET_FS_ID (frame->root->uid, gid);
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- if (preparent.ia_prot.sgid) {
- gid = preparent.ia_gid;
- }
-
- if (!flags) {
- _flags = O_CREAT | O_RDWR | O_EXCL;
- }
- else {
- _flags = flags | O_CREAT;
- }
-
- op_ret = posix_pstat (this, NULL, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
- }
-
- if (priv->o_direct)
- _flags |= O_DIRECT;
-
- _fd = open (real_path, _flags, mode);
-
- if (_fd == -1) {
- op_errno = errno;
- op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "open on %s failed: %s", real_path,
- strerror (op_errno));
- goto out;
- }
-
- if (was_present)
- goto fill_stat;
-
- op_ret = posix_gfid_set (this, real_path, loc, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting gfid on %s failed", real_path);
- }
-
-#ifndef HAVE_SET_FSID
- op_ret = chown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s",
- real_path, strerror (op_errno));
- }
-#endif
- op_ret = posix_acl_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting ACLs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
- if (priv->update_pgfid_nlinks) {
- MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX,
- loc->pargfid);
- nlink_samepgfid = 1;
- SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid,
- XATTR_CREATE, op_ret, this, ignore);
- }
-ignore:
- op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting xattrs on %s failed (%s)", real_path,
- strerror (errno));
- }
-
-fill_stat:
- op_ret = posix_fdstat (this, _fd, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat on %d failed: %s", _fd, strerror (op_errno));
- goto out;
- }
-
- op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent %s failed: %s",
- par_path, strerror (op_errno));
- goto out;
- }
-
- op_ret = -1;
- pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
- if (!pfd) {
- op_errno = errno;
- goto out;
- }
-
- pfd->flags = flags;
- pfd->fd = _fd;
-
- op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
- if (op_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set the fd context path=%s fd=%p",
- real_path, fd);
-
- LOCK (&priv->lock);
- {
- priv->nr_files++;
- }
- UNLOCK (&priv->lock);
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- if ((-1 == op_ret) && (_fd != -1)) {
- close (_fd);
-
- if (!was_present) {
- unlink (real_path);
- }
- }
-
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,
- fd, (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent, xdata);
-
- return 0;
-}
-
-int32_t
-posix_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- int32_t _fd = -1;
- struct posix_fd *pfd = NULL;
- struct posix_private *priv = NULL;
- struct iatt stbuf = {0, };
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
-
- op_ret = -1;
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- if (priv->o_direct)
- flags |= O_DIRECT;
-
- _fd = open (real_path, flags, 0);
- if (_fd == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "open on %s: %s", real_path, strerror (op_errno));
- goto out;
- }
-
- pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
- if (!pfd) {
- op_errno = errno;
- goto out;
- }
-
- pfd->flags = flags;
- pfd->fd = _fd;
-
- op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
- if (op_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set the fd context path=%s fd=%p",
- real_path, fd);
-
- LOCK (&priv->lock);
- {
- priv->nr_files++;
- }
- UNLOCK (&priv->lock);
-
- op_ret = 0;
-
-out:
- if (op_ret == -1) {
- if (_fd != -1) {
- close (_fd);
- }
- }
-
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL);
-
- return 0;
-}
-
-int
-posix_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int _fd = -1;
- struct posix_private * priv = NULL;
- struct iobuf * iobuf = NULL;
- struct iobref * iobref = NULL;
- struct iovec vec = {0,};
- struct posix_fd * pfd = NULL;
- struct iatt stbuf = {0,};
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
-
- if (!size) {
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
- goto out;
- }
-
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (!iobuf) {
- op_errno = ENOMEM;
- goto out;
- }
-
- _fd = pfd->fd;
- op_ret = pread (_fd, iobuf->ptr, size, offset);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "read failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- LOCK (&priv->lock);
- {
- priv->read_value += op_ret;
- }
- UNLOCK (&priv->lock);
-
- vec.iov_base = iobuf->ptr;
- vec.iov_len = op_ret;
-
- iobref = iobref_new ();
-
- iobref_add (iobref, iobuf);
-
- /*
- * readv successful, and we need to get the stat of the file
- * we read from
- */
-
- op_ret = posix_fdstat (this, _fd, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- /* Hack to notify higher layers of EOF. */
- if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
- op_errno = ENOENT;
-
- op_ret = vec.iov_len;
-out:
-
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- &vec, 1, &stbuf, iobref, NULL);
-
- if (iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-
-int32_t
-__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset)
-{
- int32_t op_ret = 0;
- int idx = 0;
- int retval = 0;
- off_t internal_off = 0;
-
- if (!vector)
- return -EFAULT;
-
- internal_off = offset;
- for (idx = 0; idx < count; idx++) {
- retval = pwrite (fd, vector[idx].iov_base, vector[idx].iov_len,
- internal_off);
- if (retval == -1) {
- op_ret = -errno;
- goto err;
- }
- op_ret += retval;
- internal_off += retval;
- }
-
-err:
- return op_ret;
-}
-
-int32_t
-__posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
- int odirect)
-{
- int32_t op_ret = 0;
- int idx = 0;
- int max_buf_size = 0;
- int retval = 0;
- char *buf = NULL;
- char *alloc_buf = NULL;
- off_t internal_off = 0;
-
- /* Check for the O_DIRECT flag during open() */
- if (!odirect)
- return __posix_pwritev (fd, vector, count, startoff);
-
- for (idx = 0; idx < count; idx++) {
- if (max_buf_size < vector[idx].iov_len)
- max_buf_size = vector[idx].iov_len;
- }
-
- alloc_buf = _page_aligned_alloc (max_buf_size, &buf);
- if (!alloc_buf) {
- op_ret = -errno;
- goto err;
- }
-
- internal_off = startoff;
- for (idx = 0; idx < count; idx++) {
- memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);
-
- /* not sure whether writev works on O_DIRECT'd fd */
- retval = pwrite (fd, buf, vector[idx].iov_len, internal_off);
- if (retval == -1) {
- op_ret = -errno;
- goto err;
- }
-
- op_ret += retval;
- internal_off += retval;
- }
-
-err:
- GF_FREE (alloc_buf);
-
- return op_ret;
-}
-
-dict_t*
-_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
-{
- dict_t *rsp_xdata = NULL;
- int32_t ret = 0;
- inode_t *inode = NULL;
-
- if (fd)
- inode = fd->inode;
-
- if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: "
- "fd: %p inode: %p gfid:%s", fd, inode?inode:0,
- inode?uuid_utoa(inode->gfid):"N/A");
- goto out;
- }
-
- if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT))
- goto out;
-
- rsp_xdata = dict_new();
- if (!rsp_xdata)
- goto out;
-
- ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
- fd->inode->fd_count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
- "dictionary value for %s", uuid_utoa (fd->inode->gfid),
- GLUSTERFS_OPEN_FD_COUNT);
- }
-
- ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
- is_append);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
- "dictionary value for %s", uuid_utoa (fd->inode->gfid),
- GLUSTERFS_WRITE_IS_APPEND);
- }
-out:
- return rsp_xdata;
-}
-
-int32_t
-posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int _fd = -1;
- struct posix_private * priv = NULL;
- struct posix_fd * pfd = NULL;
- struct iatt preop = {0,};
- struct iatt postop = {0,};
- int ret = -1;
- dict_t *rsp_xdata = NULL;
- int is_append = 0;
- gf_boolean_t locked = _gf_false;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (vector, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- VALIDATE_OR_GOTO (priv, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- _fd = pfd->fd;
-
- if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
- /* The write_is_append check and write must happen
- atomically. Else another write can overtake this
- write after the check and get written earlier.
-
- So lock before preop-stat and unlock after write.
- */
- locked = _gf_true;
- LOCK(&fd->inode->lock);
- }
-
- op_ret = posix_fdstat (this, _fd, &preop);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- if (locked) {
- if (preop.ia_size == offset || (fd->flags & O_APPEND))
- is_append = 1;
- }
-
- op_ret = __posix_writev (_fd, vector, count, offset,
- (pfd->flags & O_DIRECT));
-
- if (locked) {
- UNLOCK (&fd->inode->lock);
- locked = _gf_false;
- }
-
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64
- ", %s", offset, strerror (op_errno));
- goto out;
- }
-
- LOCK (&priv->lock);
- {
- priv->write_value += op_ret;
- }
- UNLOCK (&priv->lock);
-
- if (op_ret >= 0) {
- rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append);
- /* wiretv successful, we also need to get the stat of
- * the file we wrote to
- */
-
- if (flags & (O_SYNC|O_DSYNC)) {
- ret = fsync (_fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "fsync() in writev on fd %d failed: %s",
- _fd, strerror (errno));
- op_ret = -1;
- op_errno = errno;
- goto out;
- }
- }
-
- ret = posix_fdstat (this, _fd, &postop);
- if (ret == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation fstat failed on fd=%p: %s",
- fd, strerror (op_errno));
- goto out;
- }
- }
-
-out:
-
- if (locked) {
- UNLOCK (&fd->inode->lock);
- locked = _gf_false;
- }
-
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop,
- rsp_xdata);
-
- if (rsp_xdata)
- dict_unref (rsp_xdata);
- return 0;
-}
-
-
-int32_t
-posix_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
-{
- char * real_path = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct statvfs buf = {0, };
- struct posix_private * priv = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- priv = this->private;
-
- op_ret = statvfs (real_path, &buf);
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "statvfs failed on %s: %s",
- real_path, strerror (op_errno));
- goto out;
- }
-
- if (!priv->export_statfs) {
- buf.f_blocks = 0;
- buf.f_bfree = 0;
- buf.f_bavail = 0;
- buf.f_files = 0;
- buf.f_ffree = 0;
- buf.f_favail = 0;
- }
-
- op_ret = 0;
-
-out:
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL);
- return 0;
-}
-
-
-int32_t
-posix_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- struct posix_fd *pfd = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL on fd=%p", fd);
- goto out;
- }
-
- op_ret = 0;
-
-out:
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-
-int32_t
-posix_release (xlator_t *this, fd_t *fd)
-{
- struct posix_private * priv = NULL;
- struct posix_fd * pfd = NULL;
- int ret = -1;
- uint64_t tmp_pfd = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
-
- ret = fd_ctx_del (fd, this, &tmp_pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
- pfd = (struct posix_fd *)(long)tmp_pfd;
-
- if (pfd->dir) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd->dir is %p (not NULL) for file fd=%p",
- pfd->dir, fd);
- }
-
- pthread_mutex_lock (&priv->janitor_lock);
- {
- INIT_LIST_HEAD (&pfd->list);
- list_add_tail (&pfd->list, &priv->janitor_fds);
- pthread_cond_signal (&priv->janitor_cond);
- }
- pthread_mutex_unlock (&priv->janitor_lock);
-
- LOCK (&priv->lock);
- {
- priv->nr_files--;
- }
- UNLOCK (&priv->lock);
-
-out:
- return 0;
-}
-
-
-int
-posix_batch_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync, dict_t *xdata)
-{
- call_stub_t *stub = NULL;
- struct posix_private *priv = NULL;
-
- priv = this->private;
-
- stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
- return 0;
- }
-
- pthread_mutex_lock (&priv->fsync_mutex);
- {
- list_add_tail (&stub->list, &priv->fsyncs);
- priv->fsync_queue_count++;
- pthread_cond_signal (&priv->fsync_cond);
- }
- pthread_mutex_unlock (&priv->fsync_mutex);
-
- return 0;
-}
-
-
-int32_t
-posix_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int _fd = -1;
- struct posix_fd * pfd = NULL;
- int ret = -1;
- struct iatt preop = {0,};
- struct iatt postop = {0,};
- struct posix_private *priv = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
-#ifdef GF_DARWIN_HOST_OS
- /* Always return success in case of fsync in MAC OS X */
- op_ret = 0;
- goto out;
-#endif
-
- priv = this->private;
- if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
- posix_batch_fsync (frame, this, fd, datasync, xdata);
- return 0;
- }
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd not found in fd's ctx");
- goto out;
- }
-
- _fd = pfd->fd;
-
- op_ret = posix_fdstat (this, _fd, &preop);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_WARNING,
- "pre-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- if (datasync) {
- ;
- op_ret = sys_fdatasync (_fd);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "fdatasync on fd=%p failed: %s",
- fd, strerror (errno));
- }
- } else {
- op_ret = sys_fsync (_fd);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "fsync on fd=%p failed: %s",
- fd, strerror (op_errno));
- goto out;
- }
- }
-
- op_ret = posix_fdstat (this, _fd, &postop);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_WARNING,
- "post-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop,
- NULL);
-
- return 0;
-}
-
-static int gf_posix_xattr_enotsup_log;
-static int
-_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
- void *tmp)
-{
- posix_xattr_filler_t *filler = NULL;
-
- filler = tmp;
-
- return posix_handle_pair (filler->this, filler->real_path, k, v,
- filler->flags);
-}
-
-#ifdef GF_DARWIN_HOST_OS
-static inline int
-map_xattr_flags(int flags)
-{
- /* DARWIN has different defines on XATTR_ flags.
- There do not seem to be a POSIX standard
- Parse any other flags over.
- */
- int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
- if (GF_XATTR_CREATE & flags)
- darwinflags |= XATTR_CREATE;
- if (GF_XATTR_REPLACE & flags)
- darwinflags |= XATTR_REPLACE;
- return darwinflags;
-}
-#endif
-
-int32_t
-posix_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = NULL;
-
- posix_xattr_filler_t filler = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (dict, out);
-
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- op_ret = -1;
- dict_del (dict, GFID_XATTR_KEY);
- dict_del (dict, GF_XATTR_VOL_ID_KEY);
-
- filler.real_path = real_path;
- filler.this = this;
-#ifdef GF_DARWIN_HOST_OS
- filler.flags = map_xattr_flags(flags);
-#else
- filler.flags = flags;
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
#endif
- op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair,
- &filler);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *key, dict_t *dict, dict_t *xdata)
-{
- char *real_path = NULL;
- struct dirent *dirent = NULL;
- DIR *fd = NULL;
- const char *fname = NULL;
- char *found = NULL;
- int ret = -1;
- int op_ret = -1;
-
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- fd = opendir (real_path);
- if (!fd)
- return -errno;
-
- fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY);
-
- while ((dirent = readdir (fd))) {
- if (strcasecmp (dirent->d_name, fname) == 0) {
- found = gf_strdup (dirent->d_name);
- if (!found) {
- closedir (fd);
- return -ENOMEM;
- }
- break;
- }
- }
-
- closedir (fd);
-
- if (!found)
- return -ENOENT;
-
- ret = dict_set_dynstr (dict, (char *)key, found);
- if (ret) {
- GF_FREE (found);
- return -ENOMEM;
- }
- ret = strlen (found) + 1;
-
- return ret;
-}
-
-int
-posix_get_ancestry_directory (xlator_t *this, inode_t *leaf_inode,
- gf_dirent_t *head, char **path, int type,
- int32_t *op_errno, dict_t *xdata)
-{
- ssize_t handle_size = 0;
- struct posix_private *priv = NULL;
- char dirpath[PATH_MAX+1] = {0,};
- inode_t *inode = NULL;
- int ret = -1;
-
- priv = this->private;
-
- handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
-
- ret = posix_make_ancestryfromgfid (this, dirpath, PATH_MAX + 1, head,
- type | POSIX_ANCESTRY_PATH,
- leaf_inode->gfid,
- handle_size, priv->base_path,
- leaf_inode->table, &inode, xdata);
- if (ret < 0)
- goto out;
-
-
- /* there is already a reference in loc->inode */
- inode_unref (inode);
-
- if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) {
- if (strcmp (dirpath, "/"))
- dirpath[strlen (dirpath) - 1] = '\0';
-
- *path = gf_strdup (dirpath);
- }
-
-out:
- return ret;
-}
-
-int32_t
-posix_links_in_same_directory (char *dirpath, int count, inode_t *leaf_inode,
- inode_t *parent, uint64_t ino,
- gf_dirent_t *head, char **path,
- int type, dict_t *xdata, int32_t *op_errno)
-{
- DIR *dirp = NULL;
- int op_ret = -1;
- struct dirent *entry = NULL;
- struct dirent *result = NULL;
- inode_t *linked_inode = NULL;
- gf_dirent_t *gf_entry = NULL;
- char temppath[PATH_MAX+1] = {0,};
- xlator_t *this = NULL;
- struct posix_private *priv = NULL;
- char *tempv = NULL;
-
- this = THIS;
-
- priv = this->private;
-
- dirp = opendir (dirpath);
- if (!dirp) {
- *op_errno = errno;
- gf_log (this->name, GF_LOG_WARNING,
- "could not opendir %s: %s", dirpath,
- strerror (*op_errno));
- goto out;
- }
-
- entry = alloca (offsetof(struct dirent, d_name) + NAME_MAX + 1);
- if (entry == NULL)
- goto out;
-
- while (count > 0) {
- *op_errno = readdir_r (dirp, entry, &result);
- if ((result == NULL) || *op_errno)
- break;
-
- if (entry->d_ino != ino)
- continue;
-
- linked_inode = inode_link (leaf_inode, parent,
- entry->d_name, NULL);
-
- GF_ASSERT (linked_inode == leaf_inode);
- inode_unref (linked_inode);
-
- if (type & POSIX_ANCESTRY_DENTRY) {
- loc_t loc = {0, };
-
- loc.inode = inode_ref (leaf_inode);
- uuid_copy (loc.gfid, leaf_inode->gfid);
-
- strcpy (temppath, dirpath);
- strcat (temppath, "/");
- strcat (temppath, entry->d_name);
-
- gf_entry = gf_dirent_for_name (entry->d_name);
- gf_entry->inode = inode_ref (leaf_inode);
- gf_entry->dict
- = posix_lookup_xattr_fill (this,
- temppath,
- &loc, xdata,
- NULL);
- list_add_tail (&gf_entry->list, &head->list);
- loc_wipe (&loc);
- }
-
- if (type & POSIX_ANCESTRY_PATH) {
- strcpy (temppath,
- &dirpath[priv->base_path_length]);
- strcat (temppath, "/");
- strcat (temppath, entry->d_name);
- if (!*path) {
- *path = gf_strdup (temppath);
- } else {
- /* creating a colon separated */
- /* list of hard links */
- tempv = GF_REALLOC (*path, strlen (*path)
- + 1 // ':'
- + strlen (temppath) + 1 );
- if (!tempv) {
- gf_log (this->name, GF_LOG_WARNING,
- "realloc failed on path");
- GF_FREE (*path);
- op_ret = -1;
- *op_errno = ENOMEM;
- goto out;
- }
-
- *path = tempv;
- strcat (*path, ":");
- strcat (*path, temppath);
- }
- }
-
- count--;
- }
-
-out:
- if (dirp) {
- op_ret = closedir (dirp);
- if (op_ret == -1) {
- *op_errno = errno;
- gf_log (this->name, GF_LOG_WARNING,
- "closedir failed: %s",
- strerror (*op_errno));
- }
- }
-
- return op_ret;
-}
-
-int
-posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode,
- gf_dirent_t *head, char **path, int type,
- int32_t *op_errno, dict_t *xdata)
-{
- size_t remaining_size = 0;
- char dirpath[PATH_MAX+1] = {0,}, *leaf_path = NULL;
- int op_ret = -1, pathlen = -1;
- ssize_t handle_size = 0;
- char pgfidstr[UUID_CANONICAL_FORM_LEN+1] = {0,};
- uuid_t pgfid = {0, };
- int nlink_samepgfid = 0;
- struct stat stbuf = {0,};
- char *list = NULL;
- int32_t list_offset = 0;
- char key[4096] = {0,};
- struct posix_private *priv = NULL;
- ssize_t size = 0;
- inode_t *parent = NULL;
- loc_t *loc = NULL;
-
- priv = this->private;
-
- loc = GF_CALLOC (1, sizeof (*loc), gf_posix_mt_char);
- if (loc == NULL) {
- op_ret = -1;
- *op_errno = ENOMEM;
- goto out;
- }
-
- uuid_copy (loc->gfid, leaf_inode->gfid);
-
- MAKE_INODE_HANDLE (leaf_path, this, loc, NULL);
-
- GF_FREE (loc);
-
- size = sys_llistxattr (leaf_path, NULL, 0);
- if (size == -1) {
- *op_errno = errno;
- if ((errno == ENOTSUP) || (errno == ENOSYS)) {
- GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
- this->name, GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting brick"
- " with 'user_xattr' flag)");
-
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "listxattr failed on %s: %s",
- leaf_path, strerror (*op_errno));
-
- }
-
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- list = alloca (size);
- if (!list) {
- *op_errno = errno;
- goto out;
- }
-
- size = sys_llistxattr (leaf_path, list, size);
- if (size < 0) {
- op_ret = -1;
- *op_errno = errno;
- goto out;
- }
- remaining_size = size;
- list_offset = 0;
-
- op_ret = sys_lstat (leaf_path, &stbuf);
- if (op_ret == -1) {
- *op_errno = errno;
- gf_log (this->name, GF_LOG_WARNING, "lstat failed"
- " on %s: %s", leaf_path,
- strerror (*op_errno));
- goto out;
- }
-
- while (remaining_size > 0) {
- strcpy (key, list + list_offset);
- if (strncmp (key, PGFID_XATTR_KEY_PREFIX,
- strlen (PGFID_XATTR_KEY_PREFIX)) != 0)
- goto next;
-
- op_ret = sys_lgetxattr (leaf_path, key,
- &nlink_samepgfid,
- sizeof(nlink_samepgfid));
- if (op_ret == -1) {
- *op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on "
- "%s: key = %s (%s)",
- leaf_path,
- key,
- strerror (*op_errno));
- goto out;
- }
-
- nlink_samepgfid = ntoh32 (nlink_samepgfid);
-
- strcpy (pgfidstr, key + strlen(PGFID_XATTR_KEY_PREFIX));
- uuid_parse (pgfidstr, pgfid);
-
- handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
-
- /* constructing the absolute real path of parent dir */
- strcpy (dirpath, priv->base_path);
- pathlen = PATH_MAX + 1 - priv->base_path_length;
-
- op_ret = posix_make_ancestryfromgfid (this,
- dirpath + priv->base_path_length,
- pathlen,
- head,
- type | POSIX_ANCESTRY_PATH,
- pgfid,
- handle_size,
- priv->base_path,
- leaf_inode->table,
- &parent, xdata);
- if (op_ret < 0) {
- goto next;
- }
-
- dirpath[strlen (dirpath) - 1] = '\0';
-
- posix_links_in_same_directory (dirpath, nlink_samepgfid,
- leaf_inode,
- parent, stbuf.st_ino, head,
- path, type, xdata, op_errno);
-
- if (parent != NULL) {
- inode_unref (parent);
- parent = NULL;
- }
-
- next:
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
- } /* while (remaining_size > 0) */
-
- op_ret = 0;
-
-out:
- return op_ret;
-}
-
-int
-posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
- gf_dirent_t *head, char **path, int type, int32_t *op_errno,
- dict_t *xdata)
-{
- int ret = -1;
- struct posix_private *priv = NULL;
-
- priv = this->private;
-
- if (!priv->update_pgfid_nlinks)
- goto out;
-
- if (IA_ISDIR (leaf_inode->ia_type)) {
- ret = posix_get_ancestry_directory (this, leaf_inode,
- head, path, type, op_errno,
- xdata);
- } else {
- ret = posix_get_ancestry_non_directory (this, leaf_inode,
- head, path, type,
- op_errno, xdata);
- }
-
-out:
- return ret;
-}
-
-/**
- * posix_getxattr - this function returns a dictionary with all the
- * key:value pair present as xattr. used for
- * both 'listxattr' and 'getxattr'.
- */
-int32_t
-posix_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- struct posix_private *priv = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char host_buf[1024] = {0,};
- char *value = NULL;
- char *real_path = NULL;
- dict_t *dict = NULL;
- char *file_contents = NULL;
- int ret = -1;
- char *path = NULL;
- char *rpath = NULL;
- char *dyn_rpath = NULL;
- ssize_t size = 0;
- char *list = NULL;
- int32_t list_offset = 0;
- size_t remaining_size = 0;
- char keybuffer[4096] = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- op_ret = -1;
- priv = this->private;
-
- if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name &&
- ZR_FILE_CONTENT_REQUEST(name)) {
- ret = posix_get_file_contents (this, loc->gfid, &name[15],
- &file_contents);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_ERROR,
- "getting file contents failed: %s",
- strerror (op_errno));
- goto out;
- }
- }
-
- dict = dict_new ();
- if (!dict) {
- op_errno = ENOMEM;
- goto out;
- }
-
- if (loc->inode && name &&
- (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY,
- strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
- ret = posix_xattr_get_real_filename (frame, this, loc,
- name, dict, xdata);
- if (ret < 0) {
- op_ret = -1;
- op_errno = -ret;
- gf_log (this->name, (op_errno == ENOENT) ?
- GF_LOG_DEBUG : GF_LOG_WARNING,
- "Failed to get real filename (%s, %s): %s",
- loc->path, name, strerror (op_errno));
- goto out;
- }
-
- size = ret;
- goto done;
- }
-
- if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
- if (!list_empty (&loc->inode->fd_list)) {
- ret = dict_set_uint32 (dict, (char *)name, 1);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- name);
- } else {
- ret = dict_set_uint32 (dict, (char *)name, 0);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- name);
- }
- goto done;
- }
- if (loc->inode && name && (XATTR_IS_PATHINFO (name))) {
- if (LOC_HAS_ABSPATH (loc))
- MAKE_REAL_PATH (rpath, this, loc->path);
- else
- rpath = real_path;
-
- (void) snprintf (host_buf, 1024,
- "<POSIX(%s):%s:%s>", priv->base_path,
- ((priv->node_uuid_pathinfo
- && !uuid_is_null(priv->glusterd_uuid))
- ? uuid_utoa (priv->glusterd_uuid)
- : priv->hostname),
- rpath);
-
- dyn_rpath = gf_strdup (host_buf);
- if (!dyn_rpath) {
- ret = -1;
- goto done;
- }
- size = strlen (dyn_rpath) + 1;
- ret = dict_set_dynstr (dict, (char *)name, dyn_rpath);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not set value (%s) in dictionary",
- dyn_rpath);
- GF_FREE (dyn_rpath);
- }
-
- goto done;
- }
-
- if (loc->inode && name &&
- (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0)
- && !uuid_is_null (priv->glusterd_uuid)) {
- (void) snprintf (host_buf, 1024, "%s",
- uuid_utoa (priv->glusterd_uuid));
-
- dyn_rpath = gf_strdup (host_buf);
- if (!dyn_rpath) {
- ret = -1;
- goto done;
- }
-
- size = strlen (dyn_rpath) + 1;
- ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY,
- dyn_rpath);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not set value (%s) in dictionary",
- dyn_rpath);
- GF_FREE (dyn_rpath);
- }
- goto done;
- }
-
- if (loc->inode && name &&
- (strcmp (name, GFID_TO_PATH_KEY) == 0)) {
- ret = inode_path (loc->inode, NULL, &path);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "%s: could not get "
- "inode path", uuid_utoa (loc->inode->gfid));
- goto done;
- }
-
- ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not set value (%s) in dictionary",
- host_buf);
- GF_FREE (path);
- }
- goto done;
- }
-
- if (loc->inode && name
- && (strcmp (name, GET_ANCESTRY_PATH_KEY) == 0)) {
- int type = POSIX_ANCESTRY_PATH;
-
- op_ret = posix_get_ancestry (this, loc->inode, NULL,
- &path, type, &op_errno,
- xdata);
- if (op_ret < 0) {
- op_ret = -1;
- op_errno = ENODATA;
- goto out;
- }
-
- op_ret = dict_set_dynstr (dict, GET_ANCESTRY_PATH_KEY, path);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "could not get "
- "value for key (%s)", GET_ANCESTRY_PATH_KEY);
- GF_FREE (path);
- op_errno = -op_ret;
- op_ret = -1;
- }
-
- goto done;
- }
-
- if (name) {
- strcpy (keybuffer, name);
- char *key = keybuffer;
-#if defined(GF_DARWIN_HOST_OS_DISABLED)
- if (priv->xattr_user_namespace == XATTR_STRIP) {
- if (strncmp(key, "user.",5) == 0) {
- key += 5;
- gf_log (this->name,
- GF_LOG_DEBUG,
- "getxattr for file %s"
- " stripping user key: %s -> %s",
- real_path, keybuffer, key);
- }
- }
-#endif
- size = sys_lgetxattr (real_path, key, NULL, 0);
- if (size <= 0) {
- op_errno = errno;
- if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
- GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
- this->name, GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting"
- " brick with 'user_xattr' "
- "flag)");
- } else if (op_errno == ENOATTR ||
- op_errno == ENODATA) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No such attribute:%s for file %s",
- key, real_path);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on %s: %s (%s)",
- real_path, key, strerror (op_errno));
- }
-
- goto done;
- }
- value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
- if (!value) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- size = sys_lgetxattr (real_path, key, value, size);
- if (size == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
- "%s: key = %s (%s)", real_path, key,
- strerror (op_errno));
- GF_FREE (value);
- goto out;
- }
- value [size] = '\0';
- op_ret = dict_set_dynptr (dict, key, value, size);
- if (op_ret < 0) {
- op_errno = -op_ret;
- gf_log (this->name, GF_LOG_ERROR, "dict set operation "
- "on %s for the key %s failed.", real_path, key);
- GF_FREE (value);
- goto out;
- }
-
- goto done;
- }
-
- size = sys_llistxattr (real_path, NULL, 0);
- if (size == -1) {
- op_errno = errno;
- if ((errno == ENOTSUP) || (errno == ENOSYS)) {
- GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
- this->name, GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting"
- " brick with 'user_xattr' "
- "flag)");
- }
- else {
- gf_log (this->name, GF_LOG_ERROR,
- "listxattr failed on %s: %s",
- real_path, strerror (op_errno));
- }
- goto out;
- }
-
- if (size == 0)
- goto done;
-
- list = alloca (size);
- if (!list) {
- op_errno = errno;
- goto out;
- }
-
- size = sys_llistxattr (real_path, list, size);
- if (size < 0) {
- op_ret = -1;
- op_errno = errno;
- goto out;
- }
-
- remaining_size = size;
- list_offset = 0;
- while (remaining_size > 0) {
- strcpy (keybuffer, list + list_offset);
- size = sys_lgetxattr (real_path, keybuffer, NULL, 0);
- if (size == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
- "%s: key = %s (%s)", real_path, keybuffer,
- strerror (op_errno));
- break;
- }
-
- value = GF_CALLOC (size + 1, sizeof(char),
- gf_posix_mt_char);
- if (!value) {
- op_errno = errno;
- goto out;
- }
-
- size = sys_lgetxattr (real_path, keybuffer, value, size);
- if (size == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
- "%s: key = %s (%s)", real_path, keybuffer,
- strerror (op_errno));
- GF_FREE (value);
- break;
- }
-
- value [size] = '\0';
-#ifdef GF_DARWIN_HOST_OS
- /* The protocol expect namespace for now */
- char *newkey = NULL;
- gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey);
- strcpy (keybuffer, newkey);
- GF_FREE (newkey);
-#endif
- op_ret = dict_set_dynptr (dict, keybuffer, value, size);
- if (op_ret < 0) {
- op_errno = -op_ret;
- gf_log (this->name, GF_LOG_ERROR, "dict set operation "
- "on %s for the key %s failed.", real_path,
- keybuffer);
- GF_FREE (value);
- goto out;
- }
-
- remaining_size -= strlen (keybuffer) + 1;
- list_offset += strlen (keybuffer) + 1;
-
- } /* while (remaining_size > 0) */
-
-done:
- op_ret = size;
-
- if (dict) {
- dict_del (dict, GFID_XATTR_KEY);
- dict_del (dict, GF_XATTR_VOL_ID_KEY);
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL);
-
- if (dict) {
- dict_unref (dict);
- }
-
- return 0;
-}
-
-
-int32_t
-posix_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOENT;
- struct posix_fd * pfd = NULL;
- int _fd = -1;
- int32_t list_offset = 0;
- ssize_t size = 0;
- size_t remaining_size = 0;
- char key[4096] = {0,};
- char * value = NULL;
- char * list = NULL;
- dict_t * dict = NULL;
- int ret = -1;
-
- DECLARE_OLD_FS_ID_VAR;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
-
- _fd = pfd->fd;
-
- /* Get the total size */
- dict = get_new_dict ();
- if (!dict) {
- goto out;
- }
-
- if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
- ret = dict_set_uint32 (dict, (char *)name, 1);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- name);
- goto done;
- }
-
- if (name) {
- strcpy (key, name);
-#ifdef GF_DARWIN_HOST_OS
- struct posix_private *priv = NULL;
- priv = this->private;
- if (priv->xattr_user_namespace == XATTR_STRIP) {
- char *newkey = NULL;
- gf_add_prefix (XATTR_USER_PREFIX, key, &newkey);
- strcpy (key, newkey);
- GF_FREE (newkey);
- }
-#endif
- size = sys_fgetxattr (_fd, key, NULL, 0);
- if (size <= 0) {
- op_errno = errno;
- gf_log (this->name, ((errno == ENODATA) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "fgetxattr failed on key %s (%s)", key,
- strerror (op_errno));
- goto done;
- }
-
- value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
- if (!value) {
- op_ret = -1;
- goto out;
- }
- size = sys_fgetxattr (_fd, key, value, size);
- if (size == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
- "fd %p for the key %s (%s)", fd, key,
- strerror (op_errno));
- GF_FREE (value);
- goto out;
- }
- value [size] = '\0';
- op_ret = dict_set_dynptr (dict, key, value, size);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict set operation "
- "on key %s failed", key);
- GF_FREE (value);
- goto out;
- }
- goto done;
- }
-
- size = sys_flistxattr (_fd, NULL, 0);
- if (size == -1) {
- op_errno = errno;
- if ((errno == ENOTSUP) || (errno == ENOSYS)) {
- GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
- this->name, GF_LOG_WARNING,
- "Extended attributes not "
- "supported (try remounting "
- "brick with 'user_xattr' flag)");
- }
- else {
- gf_log (this->name, GF_LOG_ERROR,
- "listxattr failed on %p: %s",
- fd, strerror (op_errno));
- }
- goto out;
- }
-
- if (size == 0)
- goto done;
-
- list = alloca (size + 1);
- if (!list) {
- op_errno = errno;
- goto out;
- }
-
- size = sys_flistxattr (_fd, list, size);
-
- remaining_size = size;
- list_offset = 0;
- while (remaining_size > 0) {
- if(*(list + list_offset) == '\0')
- break;
-
- strcpy (key, list + list_offset);
- size = sys_fgetxattr (_fd, key, NULL, 0);
- if (size == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
- "fd %p for the key %s (%s)", fd, key,
- strerror (op_errno));
- break;
- }
-
- value = GF_CALLOC (size + 1, sizeof(char),
- gf_posix_mt_char);
- if (!value) {
- op_ret = -1;
- op_errno = errno;
- goto out;
- }
-
- size = sys_fgetxattr (_fd, key, value, size);
- if (size == -1) {
- op_ret = -1;
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
- "the fd %p for the key %s (%s)", fd, key,
- strerror (op_errno));
- GF_FREE (value);
- break;
- }
-
- value [size] = '\0';
-
- op_ret = dict_set_dynptr (dict, key, value, size);
- if (op_ret) {
- gf_log (this->name, GF_LOG_ERROR, "dict set operation "
- "failed on key %s", key);
- GF_FREE (value);
- goto out;
- }
- remaining_size -= strlen (key) + 1;
- list_offset += strlen (key) + 1;
-
- } /* while (remaining_size > 0) */
-
-done:
- op_ret = size;
-
- if (dict) {
- dict_del (dict, GFID_XATTR_KEY);
- dict_del (dict, GF_XATTR_VOL_ID_KEY);
- dict_ref (dict);
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-static int
-_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
- void *tmp)
-{
- posix_xattr_filler_t *filler = NULL;
-
- filler = tmp;
-
- return posix_fhandle_pair (filler->this, filler->fd, k, v,
- filler->flags);
-}
-
-int32_t
-posix_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct posix_fd * pfd = NULL;
- int _fd = -1;
- int ret = -1;
-
- posix_xattr_filler_t filler = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (dict, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
- _fd = pfd->fd;
-
- dict_del (dict, GFID_XATTR_KEY);
- dict_del (dict, GF_XATTR_VOL_ID_KEY);
-
- filler.fd = _fd;
- filler.this = this;
-#ifdef GF_DARWIN_HOST_OS
- filler.flags = map_xattr_flags(flags);
-#else
- filler.flags = flags;
-#endif
- op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair,
- &filler);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- }
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-int
-_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data)
-{
- int32_t op_ret = 0;
- xlator_t *this = NULL;
- posix_xattr_filler_t *filler = NULL;
-
- filler = (posix_xattr_filler_t *) data;
- this = filler->this;
-#ifdef GF_DARWIN_HOST_OS
- struct posix_private *priv = NULL;
- priv = (struct posix_private *) this->private;
- char *newkey = NULL;
- if (priv->xattr_user_namespace == XATTR_STRIP) {
- gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey);
- gf_log("remove_xattr", GF_LOG_DEBUG, "key %s => %s" , key,
- newkey);
- key = newkey;
- }
-#endif
- op_ret = sys_lremovexattr (filler->real_path, key);
- if (op_ret == -1) {
- filler->op_errno = errno;
- if (errno != ENOATTR && errno != EPERM)
- gf_log (this->name, GF_LOG_ERROR,
- "removexattr failed on %s (for %s): %s",
- filler->real_path, key, strerror (errno));
- }
-#ifdef GF_DARWIN_HOST_OS
- GF_FREE(newkey);
-#endif
- return op_ret;
-}
-
-
-int32_t
-posix_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char * real_path = NULL;
- posix_xattr_filler_t filler = {0,};
-
- DECLARE_OLD_FS_ID_VAR;
-
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- if (!strcmp (GFID_XATTR_KEY, name)) {
- gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
- " on gfid for file %s", real_path);
- op_ret = -1;
- goto out;
- }
- if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) {
- gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
- " on volume-id for file %s", real_path);
- op_ret = -1;
- goto out;
- }
-
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- /**
- * sending an empty key name with xdata containing the
- * list of key(s) to be removed implies "bulk remove request"
- * for removexattr.
- */
- if (name && (strcmp (name, "") == 0) && xdata) {
- filler.real_path = real_path;
- filler.this = this;
- op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler);
- if (op_ret) {
- op_errno = filler.op_errno;
- }
-
- goto out;
- }
-
- op_ret = sys_lremovexattr (real_path, name);
- if (op_ret == -1) {
- op_errno = errno;
- if (op_errno != ENOATTR && op_errno != EPERM)
- gf_log (this->name, GF_LOG_ERROR,
- "removexattr on %s (for %s): %s", real_path,
- name, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-int32_t
-posix_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct posix_fd * pfd = NULL;
- int _fd = -1;
- int ret = -1;
-
- DECLARE_OLD_FS_ID_VAR;
-
- if (!strcmp (GFID_XATTR_KEY, name)) {
- gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
- " on gfid for file");
- goto out;
- }
- if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) {
- gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
- " on volume-id for file");
- goto out;
- }
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL from fd=%p", fd);
- goto out;
- }
- _fd = pfd->fd;
-
-
-
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- op_ret = sys_fremovexattr (_fd, name);
- if (op_ret == -1) {
- op_errno = errno;
- if (op_errno != ENOATTR && op_errno != EPERM)
- gf_log (this->name, GF_LOG_ERROR,
- "fremovexattr (for %s): %s",
- name, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-posix_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- struct posix_fd *pfd = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- op_errno = -ret;
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL, fd=%p", fd);
- goto out;
- }
-
- op_ret = 0;
-
-out:
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-
-void
-posix_print_xattr (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- gf_log ("posix", GF_LOG_DEBUG,
- "(key/val) = (%s/%d)", key, data_to_int32 (value));
-}
-
-
-/**
- * add_array - add two arrays of 32-bit numbers (stored in network byte order)
- * dest = dest + src
- * @count: number of 32-bit numbers
- * FIXME: handle overflow
- */
-
-static void
-__add_array (int32_t *dest, int32_t *src, int count)
-{
- int i = 0;
- int32_t destval = 0;
- for (i = 0; i < count; i++) {
- destval = ntoh32 (dest[i]);
- dest[i] = hton32 (destval + ntoh32 (src[i]));
- }
-}
-
-static void
-__add_long_array (int64_t *dest, int64_t *src, int count)
-{
- int i = 0;
- for (i = 0; i < count; i++) {
- dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i]));
- }
-}
-
-static int
-_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
- void *tmp)
-{
- int size = 0;
- int count = 0;
- int op_ret = 0;
- int op_errno = 0;
- gf_xattrop_flags_t optype = 0;
- char *array = NULL;
- inode_t *inode = NULL;
- xlator_t *this = NULL;
- posix_xattr_filler_t *filler = NULL;
-
- filler = tmp;
-
- optype = (gf_xattrop_flags_t)(filler->flags);
- this = filler->this;
- inode = filler->inode;
- count = v->len;
- array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char);
-
-#ifdef GF_DARWIN_HOST_OS
- struct posix_private *priv = NULL;
- priv = this->private;
- if (priv->xattr_user_namespace == XATTR_STRIP) {
- if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) {
- k += XATTR_USER_PREFIX_LEN;
- }
- }
-#endif
-
- LOCK (&inode->lock);
- {
- if (filler->real_path) {
- size = sys_lgetxattr (filler->real_path, k,
- (char *)array, v->len);
- } else {
- size = sys_fgetxattr (filler->fd, k, (char *)array,
- v->len);
- }
-
- op_errno = errno;
- if ((size == -1) && (op_errno != ENODATA) &&
- (op_errno != ENOATTR)) {
- if (op_errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name, GF_LOG_WARNING,
- "Extended attributes not "
- "supported by filesystem");
- } else if (op_errno != ENOENT ||
- !posix_special_xattr (marker_xattrs,
- k)) {
- if (filler->real_path)
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on %s while doing "
- "xattrop: Key:%s (%s)",
- filler->real_path,
- k, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fgetxattr failed on fd=%d while doing "
- "xattrop: Key:%s (%s)",
- filler->fd,
- k, strerror (op_errno));
- }
-
- op_ret = -1;
- goto unlock;
- }
-
- switch (optype) {
-
- case GF_XATTROP_ADD_ARRAY:
- __add_array ((int32_t *) array, (int32_t *) v->data,
- v->len / 4);
- break;
-
- case GF_XATTROP_ADD_ARRAY64:
- __add_long_array ((int64_t *) array, (int64_t *) v->data,
- v->len / 8);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unknown xattrop type (%d) on %s. Please send "
- "a bug report to gluster-devel@gluster.org",
- optype, filler->real_path);
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
-
- if (filler->real_path) {
- size = sys_lsetxattr (filler->real_path, k, array,
- v->len, 0);
- } else {
- size = sys_fsetxattr (filler->fd, k, (char *)array,
- v->len, 0);
- }
- }
-unlock:
- UNLOCK (&inode->lock);
-
- if (op_ret == -1)
- goto out;
-
- op_errno = errno;
- if (size == -1) {
- if (filler->real_path)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr failed on %s while doing xattrop: "
- "key=%s (%s)", filler->real_path,
- k, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr failed on fd=%d while doing xattrop: "
- "key=%s (%s)", filler->fd,
- k, strerror (op_errno));
-
- op_ret = -1;
- goto out;
- } else {
- size = dict_set_bin (d, k, array, v->len);
-
- if (size != 0) {
- if (filler->real_path)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (path=%s): "
- "key=%s (%s)", filler->real_path,
- k, strerror (-size));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (fd=%d): "
- "key=%s (%s)", filler->fd,
- k, strerror (-size));
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- array = NULL;
- }
-
- array = NULL;
-
-out:
- return op_ret;
-}
-
-/**
- * xattrop - xattr operations - for internal use by GlusterFS
- * @optype: ADD_ARRAY:
- * dict should contain:
- * "key" ==> array of 32-bit numbers
- */
-
-int
-do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- int op_ret = 0;
- int op_errno = 0;
- int _fd = -1;
- char *real_path = NULL;
- struct posix_fd *pfd = NULL;
- inode_t *inode = NULL;
- posix_xattr_filler_t filler = {0,};
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (xattr, out);
- VALIDATE_OR_GOTO (this, out);
-
- if (fd) {
- op_ret = posix_fd_ctx_get (fd, this, &pfd);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get pfd from fd=%p",
- fd);
- op_errno = EBADFD;
- goto out;
- }
- _fd = pfd->fd;
- }
-
- if (loc && !uuid_is_null (loc->gfid))
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- if (real_path) {
- inode = loc->inode;
- } else if (fd) {
- inode = fd->inode;
- }
-
- filler.this = this;
- filler.fd = _fd;
- filler.real_path = real_path;
- filler.flags = (int)optype;
- filler.inode = inode;
-
- op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair,
- &filler);
-
-out:
-
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL);
- return 0;
-}
-
-
-int
-posix_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- do_xattrop (frame, this, loc, NULL, optype, xattr);
- return 0;
-}
-
-
-int
-posix_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- do_xattrop (frame, this, NULL, fd, optype, xattr);
- return 0;
-}
-
-
-int
-posix_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
-
- MAKE_INODE_HANDLE (real_path, this, loc, NULL);
-
- op_ret = access (real_path, mask & 07);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s",
- real_path, strerror (op_errno));
- goto out;
- }
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-posix_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int _fd = -1;
- struct iatt preop = {0,};
- struct iatt postop = {0,};
- struct posix_fd *pfd = NULL;
- int ret = -1;
- struct posix_private *priv = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- _fd = pfd->fd;
-
- op_ret = posix_fdstat (this, _fd, &preop);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "pre-operation fstat failed on fd=%p: %s", fd,
- strerror (op_errno));
- goto out;
- }
-
- op_ret = ftruncate (_fd, offset);
-
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "ftruncate failed on fd=%p (%"PRId64": %s",
- fd, offset, strerror (errno));
- goto out;
- }
-
- op_ret = posix_fdstat (this, _fd, &postop);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "post-operation fstat failed on fd=%p: %s",
- fd, strerror (errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop,
- &postop, NULL);
-
- return 0;
-}
-
-
-int32_t
-posix_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- int _fd = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- struct iatt buf = {0,};
- struct posix_fd *pfd = NULL;
- int ret = -1;
- struct posix_private *priv = NULL;
-
- DECLARE_OLD_FS_ID_VAR;
- SET_FS_ID (frame->root->uid, frame->root->gid);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv, out);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- _fd = pfd->fd;
-
- op_ret = posix_fdstat (this, _fd, &buf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s",
- fd, strerror (op_errno));
- goto out;
- }
-
- op_ret = 0;
-
-out:
- SET_TO_OLD_FS_ID ();
-
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, NULL);
- return 0;
-}
-
-static int gf_posix_lk_log;
-
-int32_t
-posix_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- struct gf_flock nullock = {0, };
-
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
- "functioning of your application.");
-
- STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL);
- return 0;
-}
-
-int32_t
-posix_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
- "functioning of your application.");
-
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL);
- return 0;
-}
-
-int32_t
-posix_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
- "functioning of your application.");
-
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL);
- return 0;
-}
-
-
-int32_t
-posix_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
- "functioning of your application.");
-
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL);
- return 0;
-}
-
-int32_t
-posix_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
- "functioning of your application.");
-
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL);
- return 0;
-}
-
-
-int
-posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
- gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
-{
- off_t in_case = -1;
- size_t filled = 0;
- int count = 0;
- char entrybuf[sizeof(struct dirent) + 256 + 8];
- struct dirent *entry = NULL;
- int32_t this_size = -1;
- gf_dirent_t *this_entry = NULL;
- uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
- struct stat stbuf = {0,};
- char *hpath = NULL;
- int len = 0;
- int ret = 0;
-
- if (skip_dirs) {
- len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
- hpath = alloca (len + 256); /* NAME_MAX */
- posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
- len = strlen (hpath);
- hpath[len] = '/';
- }
-
- if (!off) {
- rewinddir (dir);
- } else {
- seekdir (dir, off);
- }
-
- while (filled <= size) {
- in_case = telldir (dir);
-
- if (in_case == -1) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "telldir failed on dir=%p: %s",
- dir, strerror (errno));
- goto out;
- }
-
- errno = 0;
- entry = NULL;
- readdir_r (dir, (struct dirent *)entrybuf, &entry);
-
- if (!entry) {
- if (errno == EBADF) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "readdir failed on dir=%p: %s",
- dir, strerror (errno));
- goto out;
- }
- break;
- }
-
-#ifdef __NetBSD__
- /*
- * NetBSD with UFS1 backend uses backing files for
- * extended attributes. They can be found in a
- * .attribute file located at the root of the filesystem
- * We hide it to glusterfs clients, since chaos will occur
- * when the cluster/dht xlator decides to distribute
- * exended attribute backing file accross storage servers.
- */
- if ((uuid_compare (fd->inode->gfid, rootgfid) == 0)
- && (!strcmp(entry->d_name, ".attribute")))
- continue;
-#endif /* __NetBSD__ */
-
- if ((uuid_compare (fd->inode->gfid, rootgfid) == 0)
- && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) {
- continue;
- }
-
- if (skip_dirs) {
- if (DT_ISDIR (entry->d_type)) {
- continue;
- } else if (hpath) {
- strcpy (&hpath[len+1],entry->d_name);
- ret = lstat (hpath, &stbuf);
- if (!ret && S_ISDIR (stbuf.st_mode))
- continue;
- }
- }
-
- this_size = max (sizeof (gf_dirent_t),
- sizeof (gfs3_dirplist))
- + strlen (entry->d_name) + 1;
-
- if (this_size + filled > size) {
- seekdir (dir, in_case);
- break;
- }
-
- this_entry = gf_dirent_for_name (entry->d_name);
-
- if (!this_entry) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "could not create gf_dirent for entry %s: (%s)",
- entry->d_name, strerror (errno));
- goto out;
- }
- this_entry->d_off = telldir (dir);
- this_entry->d_ino = entry->d_ino;
- this_entry->d_type = entry->d_type;
-
- list_add_tail (&this_entry->list, &entries->list);
-
- filled += this_size;
- count ++;
- }
-
- if ((!readdir (dir) && (errno == 0)))
- /* Indicate EOF */
- errno = ENOENT;
-out:
- return count;
-}
-
-dict_t *
-posix_entry_xattr_fill (xlator_t *this, inode_t *inode,
- fd_t *fd, char *name, dict_t *dict,
- struct iatt *stbuf)
-{
- loc_t tmp_loc = {0,};
- char *entry_path = NULL;
-
- /* if we don't send the 'loc', open-fd-count be a problem. */
- tmp_loc.inode = inode;
-
- MAKE_HANDLE_PATH (entry_path, this, fd->inode->gfid, name);
-
- return posix_lookup_xattr_fill (this, entry_path,
- &tmp_loc, dict, stbuf);
-
-}
-
-
-int
-posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict)
-{
- gf_dirent_t *entry = NULL;
- inode_table_t *itable = NULL;
- inode_t *inode = NULL;
- char *hpath = NULL;
- int len = 0;
- struct iatt stbuf = {0, };
- uuid_t gfid;
-
- if (list_empty(&entries->list))
- return 0;
-
- itable = fd->inode->table;
-
- len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
- hpath = alloca (len + 256); /* NAME_MAX */
- posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
- len = strlen (hpath);
- hpath[len] = '/';
-
- list_for_each_entry (entry, &entries->list, list) {
- memset (gfid, 0, 16);
- inode = inode_grep (fd->inode->table, fd->inode,
- entry->d_name);
- if (inode)
- uuid_copy (gfid, inode->gfid);
-
- strcpy (&hpath[len+1], entry->d_name);
-
- posix_pstat (this, gfid, hpath, &stbuf);
-
- if (!inode)
- inode = inode_find (itable, stbuf.ia_gfid);
-
- if (!inode)
- inode = inode_new (itable);
-
- entry->inode = inode;
-
- if (dict) {
- entry->dict =
- posix_entry_xattr_fill (this, entry->inode,
- fd, entry->d_name,
- dict, &stbuf);
- dict_ref (entry->dict);
- }
-
- entry->d_stat = stbuf;
- if (stbuf.ia_ino)
- entry->d_ino = stbuf.ia_ino;
- inode = NULL;
- }
-
- return 0;
-}
-
-
-int32_t
-posix_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict)
-{
- struct posix_fd *pfd = NULL;
- DIR *dir = NULL;
- int ret = -1;
- int count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- gf_dirent_t entries;
- int32_t skip_dirs = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- INIT_LIST_HEAD (&entries.list);
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- dir = pfd->dir;
-
- if (!dir) {
- gf_log (this->name, GF_LOG_WARNING,
- "dir is NULL for fd=%p", fd);
- op_errno = EINVAL;
- goto out;
- }
-
- /* When READDIR_FILTER option is set to on, we can filter out
- * directory's entry from the entry->list.
- */
- ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
-
- LOCK (&fd->lock);
- {
- /* posix_fill_readdir performs multiple separate individual
- readdir() calls to fill up the buffer.
-
- In case of NFS where the same anonymous FD is shared between
- different applications, reading a common directory can
- result in the anonymous fd getting re-used unsafely between
- the two readdir requests (in two different io-threads).
-
- It would also help, in the future, to replace the loop
- around readdir() with a single large getdents() call.
- */
- count = posix_fill_readdir (fd, dir, off, size, &entries, this,
- skip_dirs);
- }
- UNLOCK (&fd->lock);
-
- /* pick ENOENT to indicate EOF */
- op_errno = errno;
- op_ret = count;
-
- if (whichop != GF_FOP_READDIRP)
- goto out;
-
- posix_readdirp_fill (this, fd, &entries, dict);
-
-out:
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free (&entries);
-
- return 0;
-}
-
-
-int32_t
-posix_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, dict_t *xdata)
-{
- posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata);
- return 0;
-}
-
-
-int32_t
-posix_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, dict_t *dict)
-{
- gf_dirent_t entries;
- int32_t op_ret = -1, op_errno = 0;
- gf_dirent_t *entry = NULL;
-
-
- if ((dict != NULL) && (dict_get (dict, GET_ANCESTRY_DENTRY_KEY))) {
- INIT_LIST_HEAD (&entries.list);
-
- op_ret = posix_get_ancestry (this, fd->inode, &entries, NULL,
- POSIX_ANCESTRY_DENTRY,
- &op_errno, dict);
- if (op_ret >= 0) {
- op_ret = 0;
-
- list_for_each_entry (entry, &entries.list, list) {
- op_ret++;
- }
- }
-
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries,
- NULL);
-
- gf_dirent_free (&entries);
- return 0;
- }
-
- posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict);
- return 0;
-}
-
-int32_t
-posix_priv (xlator_t *this)
-{
- struct posix_private *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
-
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
- this->name);
- gf_proc_dump_add_section(key_prefix);
-
- if (!this)
- return 0;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- gf_proc_dump_write("base_path","%s", priv->base_path);
- gf_proc_dump_write("base_path_length","%d", priv->base_path_length);
- gf_proc_dump_write("max_read","%d", priv->read_value);
- gf_proc_dump_write("max_write","%d", priv->write_value);
- gf_proc_dump_write("nr_files","%ld", priv->nr_files);
-
- return 0;
-}
-
-int32_t
-posix_inode (xlator_t *this)
-{
- return 0;
-}
-
-
-int32_t
-posix_rchecksum (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
-{
- char *alloc_buf = NULL;
- char *buf = NULL;
- int _fd = -1;
- struct posix_fd *pfd = NULL;
- int op_ret = -1;
- int op_errno = 0;
- int ret = 0;
- int32_t weak_checksum = 0;
- unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
- struct posix_private *priv = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
-
- alloc_buf = _page_aligned_alloc (len, &buf);
- if (!alloc_buf) {
- op_errno = ENOMEM;
- goto out;
- }
-
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- _fd = pfd->fd;
-
- LOCK (&fd->lock);
- {
- if (priv->aio_capable && priv->aio_init_done)
- __posix_fd_set_odirect (fd, pfd, 0, offset, len);
-
- ret = pread (_fd, buf, len, offset);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pread of %d bytes returned %d (%s)",
- len, ret, strerror (errno));
-
- op_errno = errno;
- }
-
- }
- UNLOCK (&fd->lock);
-
- if (ret < 0)
- goto out;
-
- weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret);
- gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) ret, (unsigned char *) strong_checksum);
-
- op_ret = 0;
-out:
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,
- weak_checksum, strong_checksum, NULL);
-
- GF_FREE (alloc_buf);
-
- return 0;
-}
-
-
-/**
- * notify - when parent sends PARENT_UP, send CHILD_UP event from here
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- switch (event)
- {
- case GF_EVENT_PARENT_UP:
- {
- /* Tell the parent that posix xlator is up */
- default_notify (this, GF_EVENT_CHILD_UP, data);
- }
- break;
- default:
- /* */
- break;
- }
- return 0;
-}
+#include <glusterfs/xlator.h>
+#include "posix.h"
int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-static int
-posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
-{
- struct posix_private *priv = NULL;
- int ret = -1;
- struct stat st = {0,};
-
- priv = this->private;
-
- ret = sys_lstat (priv->base_path, &st);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to stat "
- "brick path %s (%s)",
- priv->base_path, strerror (errno));
- return ret;
- }
-
- if ((uid == -1 || st.st_uid == uid) &&
- (gid == -1 || st.st_gid == gid))
- return 0;
-
- ret = sys_chown (priv->base_path, uid, gid);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Failed to set "
- "uid/gid for brick path %s, %s",
- priv->base_path, strerror (errno));
-
- return ret;
-}
-
-
-static int
-set_batch_fsync_mode (struct posix_private *priv, const char *str)
-{
- if (strcmp (str, "none") == 0)
- priv->batch_fsync_mode = BATCH_NONE;
- else if (strcmp (str, "syncfs") == 0)
- priv->batch_fsync_mode = BATCH_SYNCFS;
- else if (strcmp (str, "syncfs-single-fsync") == 0)
- priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
- else if (strcmp (str, "syncfs-reverse-fsync") == 0)
- priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
- else if (strcmp (str, "reverse-fsync") == 0)
- priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
- else
- return -1;
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-static int
-set_xattr_user_namespace_mode (struct posix_private *priv, const char *str)
-{
- if (strcmp (str, "none") == 0)
- priv->xattr_user_namespace = XATTR_NONE;
- else if (strcmp (str, "strip") == 0)
- priv->xattr_user_namespace = XATTR_STRIP;
- else if (strcmp (str, "append") == 0)
- priv->xattr_user_namespace = XATTR_APPEND;
- else if (strcmp (str, "both") == 0)
- priv->xattr_user_namespace = XATTR_BOTH;
- else
- return -1;
- return 0;
-}
-#endif
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- int ret = -1;
- struct posix_private *priv = NULL;
- int32_t uid = -1;
- int32_t gid = -1;
- char *batch_fsync_mode_str = NULL;
-
- priv = this->private;
-
- GF_OPTION_RECONF ("brick-uid", uid, options, int32, out);
- GF_OPTION_RECONF ("brick-gid", gid, options, int32, out);
- if (uid != -1 || gid != -1)
- posix_set_owner (this, uid, gid);
-
- GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
- options, uint32, out);
-
- GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str,
- options, str, out);
-
- if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
- batch_fsync_mode_str);
- goto out;
- }
-
-#ifdef GF_DARWIN_HOST_OS
-
- char *xattr_user_namespace_mode_str = NULL;
+mem_acct_init(xlator_t *this);
- GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str,
- options, str, out);
-
- if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Unknown xattr user namespace mode string: %s",
- xattr_user_namespace_mode_str);
- goto out;
- }
-
-#endif
-
- GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
- options, bool, out);
-
- if (priv->aio_configured)
- posix_aio_on (this);
- else
- posix_aio_off (this);
-
- GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks,
- options, bool, out);
-
- GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo,
- options, bool, out);
-
- if (priv->node_uuid_pathinfo &&
- (uuid_is_null (priv->glusterd_uuid))) {
- gf_log (this->name, GF_LOG_INFO,
- "glusterd uuid is NULL, pathinfo xattr would"
- " fallback to <hostname>:<export>");
- }
-
- GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
- options, uint32, out);
- posix_spawn_health_check_thread (this);
-
- ret = 0;
-out:
- return ret;
-}
-
-
-/**
- * init -
- */
-int
-init (xlator_t *this)
-{
- struct posix_private *_private = NULL;
- data_t *dir_data = NULL;
- data_t *tmp_data = NULL;
- struct stat buf = {0,};
- gf_boolean_t tmp_bool = 0;
- int dict_ret = 0;
- int ret = 0;
- int op_ret = -1;
- ssize_t size = -1;
- int32_t janitor_sleep = 0;
- uuid_t old_uuid = {0,};
- uuid_t dict_uuid = {0,};
- uuid_t gfid = {0,};
- uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
- char *guuid = NULL;
- int32_t uid = -1;
- int32_t gid = -1;
- char *batch_fsync_mode_str;
-
- dir_data = dict_get (this->options, "directory");
-
- if (this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: storage/posix cannot have subvolumes");
- ret = -1;
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. Please check the volume file.");
- }
-
- if (!dir_data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Export directory not specified in volume file.");
- ret = -1;
- goto out;
- }
-
- umask (000); // umask `masking' is done at the client side
-
- /* Check whether the specified directory exists, if not log it. */
- op_ret = stat (dir_data->data, &buf);
- if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Directory '%s' doesn't exist, exiting.",
- dir_data->data);
- ret = -1;
- goto out;
- }
-
- /* Check for Extended attribute support, if not present, log it */
- op_ret = sys_lsetxattr (dir_data->data,
- "trusted.glusterfs.test", "working", 8, 0);
- if (op_ret == 0) {
- sys_lremovexattr (dir_data->data, "trusted.glusterfs.test");
- } else {
- tmp_data = dict_get (this->options,
- "mandate-attribute");
- if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong option provided for key "
- "\"mandate-attribute\"");
- ret = -1;
- goto out;
- }
- if (!tmp_bool) {
- gf_log (this->name, GF_LOG_WARNING,
- "Extended attribute not supported, "
- "starting as per option");
- } else {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Extended attribute not supported, "
- "exiting.");
- ret = -1;
- goto out;
- }
- } else {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Extended attribute not supported, exiting.");
- ret = -1;
- goto out;
- }
- }
-
- tmp_data = dict_get (this->options, "volume-id");
- if (tmp_data) {
- op_ret = uuid_parse (tmp_data->data, dict_uuid);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong volume-id (%s) set in volume file",
- tmp_data->data);
- ret = -1;
- goto out;
- }
- size = sys_lgetxattr (dir_data->data,
- "trusted.glusterfs.volume-id", old_uuid, 16);
- if (size == 16) {
- if (uuid_compare (old_uuid, dict_uuid)) {
- gf_log (this->name, GF_LOG_ERROR,
- "mismatching volume-id (%s) received. "
- "already is a part of volume %s ",
- tmp_data->data, uuid_utoa (old_uuid));
- ret = -1;
- goto out;
- }
- } else if ((size == -1) && (errno == ENODATA)) {
-
- gf_log (this->name, GF_LOG_ERROR,
- "Extended attribute trusted.glusterfs."
- "volume-id is absent");
- ret = -1;
- goto out;
-
- } else if ((size == -1) && (errno != ENODATA)) {
- /* Wrong 'volume-id' is set, it should be error */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to fetch volume-id (%s)",
- dir_data->data, strerror (errno));
- ret = -1;
- goto out;
- } else {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to fetch proper volume id from export");
- goto out;
- }
- }
-
- /* Now check if the export directory has some other 'gfid',
- other than that of root '/' */
- size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16);
- if (size == 16) {
- if (!__is_root_gfid (gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid (%s) is not that of glusterfs '/' ",
- dir_data->data, uuid_utoa (gfid));
- ret = -1;
- goto out;
- }
- } else if (size != -1) {
- /* Wrong 'gfid' is set, it should be error */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: wrong value set as gfid",
- dir_data->data);
- ret = -1;
- goto out;
- } else if ((size == -1) && (errno != ENODATA) &&
- (errno != ENOATTR)) {
- /* Wrong 'gfid' is set, it should be error */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to fetch gfid (%s)",
- dir_data->data, strerror (errno));
- ret = -1;
- goto out;
- } else {
- /* First time volume, set the GFID */
- size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid,
- 16, XATTR_CREATE);
- if (size) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid (%s)",
- dir_data->data, strerror (errno));
- ret = -1;
- goto out;
- }
- }
-
- size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR,
- NULL, 0);
- if ((size < 0) && (errno == ENOTSUP))
- gf_log (this->name, GF_LOG_WARNING,
- "Posix access control list is not supported.");
-
- ret = 0;
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_posix_mt_posix_private);
- if (!_private) {
- ret = -1;
- goto out;
- }
-
- _private->base_path = gf_strdup (dir_data->data);
- _private->base_path_length = strlen (_private->base_path);
-
- LOCK_INIT (&_private->lock);
-
- ret = dict_get_str (this->options, "hostname", &_private->hostname);
- if (ret) {
- _private->hostname = GF_CALLOC (256, sizeof (char),
- gf_common_mt_char);
- if (!_private->hostname) {
- goto out;
- }
- ret = gethostname (_private->hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)", strerror (errno));
- }
- }
-
- _private->export_statfs = 1;
- tmp_data = dict_get (this->options, "export-statfs-size");
- if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->export_statfs) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "'export-statfs-size' takes only boolean "
- "options");
- goto out;
- }
- if (!_private->export_statfs)
- gf_log (this->name, GF_LOG_DEBUG,
- "'statfs()' returns dummy size");
- }
-
- _private->background_unlink = 0;
- tmp_data = dict_get (this->options, "background-unlink");
- if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->background_unlink) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "'background-unlink' takes only boolean "
- "options");
- goto out;
- }
-
- if (_private->background_unlink)
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinks will be performed in background");
- }
-
- tmp_data = dict_get (this->options, "o-direct");
- if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->o_direct) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "wrong option provided for 'o-direct'");
- goto out;
- }
- if (_private->o_direct)
- gf_log (this->name, GF_LOG_DEBUG,
- "o-direct mode is enabled (O_DIRECT "
- "for every open)");
- }
-
- tmp_data = dict_get (this->options, "update-link-count-parent");
- if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->update_pgfid_nlinks) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "wrong value provided for "
- "'update-link-count-parent'");
- goto out;
- }
- if (_private->update_pgfid_nlinks)
- gf_log (this->name, GF_LOG_DEBUG,
- "update-link-count-parent is enabled. Thus for each "
- "file an extended attribute representing the "
- "number of hardlinks for that file within the "
- "same parent directory is set.");
- }
-
- ret = dict_get_str (this->options, "glusterd-uuid", &guuid);
- if (!ret) {
- if (uuid_parse (guuid, _private->glusterd_uuid))
- gf_log (this->name, GF_LOG_WARNING, "Cannot parse "
- "glusterd (node) UUID, node-uuid xattr "
- "request would return - \"No such attribute\"");
- } else {
- gf_log (this->name, GF_LOG_DEBUG, "No glusterd (node) UUID "
- "passed - node-uuid xattr request will return "
- "\"No such attribute\"");
- }
- ret = 0;
-
- _private->janitor_sleep_duration = 600;
-
- dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration",
- &janitor_sleep);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting janitor sleep duration to %d.",
- janitor_sleep);
-
- _private->janitor_sleep_duration = janitor_sleep;
- }
- /* performing open dir on brick dir locks the brick dir
- * and prevents it from being unmounted
- */
- _private->mount_lock = opendir (dir_data->data);
- if (!_private->mount_lock) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Could not lock brick directory");
- goto out;
- }
-#ifndef GF_DARWIN_HOST_OS
- {
- struct rlimit lim;
- lim.rlim_cur = 1048576;
- lim.rlim_max = 1048576;
-
- if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set 'ulimit -n "
- " 1048576': %s", strerror(errno));
- lim.rlim_cur = 65536;
- lim.rlim_max = 65536;
-
- if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set maximum allowed open "
- "file descriptors to 64k: %s",
- strerror(errno));
- }
- else {
- gf_log (this->name, GF_LOG_INFO,
- "Maximum allowed open file descriptors "
- "set to 65536");
- }
- }
- }
-#endif
- this->private = (void *)_private;
-
- op_ret = posix_handle_init (this);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Posix handle setup failed");
- ret = -1;
- goto out;
- }
-
- op_ret = posix_handle_trash_init (this);
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Posix landfill setup failed");
- ret = -1;
- goto out;
- }
-
- _private->aio_init_done = _gf_false;
- _private->aio_capable = _gf_false;
-
- GF_OPTION_INIT ("brick-uid", uid, int32, out);
- GF_OPTION_INIT ("brick-gid", gid, int32, out);
- if (uid != -1 || gid != -1)
- posix_set_owner (this, uid, gid);
-
- GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out);
-
- if (_private->aio_configured) {
- op_ret = posix_aio_on (this);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Posix AIO init failed");
- ret = -1;
- goto out;
- }
- }
-
- GF_OPTION_INIT ("node-uuid-pathinfo",
- _private->node_uuid_pathinfo, bool, out);
- if (_private->node_uuid_pathinfo &&
- (uuid_is_null (_private->glusterd_uuid))) {
- gf_log (this->name, GF_LOG_INFO,
- "glusterd uuid is NULL, pathinfo xattr would"
- " fallback to <hostname>:<export>");
- }
-
- _private->health_check_active = _gf_false;
- GF_OPTION_INIT ("health-check-interval",
- _private->health_check_interval, uint32, out);
- if (_private->health_check_interval)
- posix_spawn_health_check_thread (this);
-
- pthread_mutex_init (&_private->janitor_lock, NULL);
- pthread_cond_init (&_private->janitor_cond, NULL);
- INIT_LIST_HEAD (&_private->janitor_fds);
-
- posix_spawn_janitor_thread (this);
-
- pthread_mutex_init (&_private->fsync_mutex, NULL);
- pthread_cond_init (&_private->fsync_cond, NULL);
- INIT_LIST_HEAD (&_private->fsyncs);
-
- ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "fsyncer thread"
- " creation failed (%s)", strerror (errno));
- goto out;
- }
-
- GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out);
-
- if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
- batch_fsync_mode_str);
- goto out;
- }
-
-#ifdef GF_DARWIN_HOST_OS
-
- char *xattr_user_namespace_mode_str = NULL;
-
- GF_OPTION_INIT ("xattr-user-namespace-mode",
- xattr_user_namespace_mode_str, str, out);
-
- if (set_xattr_user_namespace_mode (_private,
- xattr_user_namespace_mode_str) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unknown xattr user namespace mode string: %s",
- xattr_user_namespace_mode_str);
- goto out;
- }
-#endif
-
- GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
- uint32, out);
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- struct posix_private *priv = this->private;
- if (!priv)
- return;
- this->private = NULL;
- /*unlock brick dir*/
- if (priv->mount_lock)
- closedir (priv->mount_lock);
- GF_FREE (priv);
- return;
-}
+extern struct volume_options posix_options[];
struct xlator_dumpops dumpops = {
- .priv = posix_priv,
- .inode = posix_inode,
+ .priv = posix_priv,
+ .inode = posix_inode,
};
struct xlator_fops fops = {
- .lookup = posix_lookup,
- .stat = posix_stat,
- .opendir = posix_opendir,
- .readdir = posix_readdir,
- .readdirp = posix_readdirp,
- .readlink = posix_readlink,
- .mknod = posix_mknod,
- .mkdir = posix_mkdir,
- .unlink = posix_unlink,
- .rmdir = posix_rmdir,
- .symlink = posix_symlink,
- .rename = posix_rename,
- .link = posix_link,
- .truncate = posix_truncate,
- .create = posix_create,
- .open = posix_open,
- .readv = posix_readv,
- .writev = posix_writev,
- .statfs = posix_statfs,
- .flush = posix_flush,
- .fsync = posix_fsync,
- .setxattr = posix_setxattr,
- .fsetxattr = posix_fsetxattr,
- .getxattr = posix_getxattr,
- .fgetxattr = posix_fgetxattr,
- .removexattr = posix_removexattr,
- .fremovexattr = posix_fremovexattr,
- .fsyncdir = posix_fsyncdir,
- .access = posix_access,
- .ftruncate = posix_ftruncate,
- .fstat = posix_fstat,
- .lk = posix_lk,
- .inodelk = posix_inodelk,
- .finodelk = posix_finodelk,
- .entrylk = posix_entrylk,
- .fentrylk = posix_fentrylk,
- .rchecksum = posix_rchecksum,
- .xattrop = posix_xattrop,
- .fxattrop = posix_fxattrop,
- .setattr = posix_setattr,
- .fsetattr = posix_fsetattr,
- .fallocate = _posix_fallocate,
- .discard = posix_discard,
- .zerofill = posix_zerofill,
+ .lookup = posix_lookup,
+ .stat = posix_stat,
+ .opendir = posix_opendir,
+ .readdir = posix_readdir,
+ .readdirp = posix_readdirp,
+ .readlink = posix_readlink,
+ .mknod = posix_mknod,
+ .mkdir = posix_mkdir,
+ .unlink = posix_unlink,
+ .rmdir = posix_rmdir,
+ .symlink = posix_symlink,
+ .rename = posix_rename,
+ .link = posix_link,
+ .truncate = posix_truncate,
+ .create = posix_create,
+ .open = posix_open,
+ .readv = posix_readv,
+ .writev = posix_writev,
+ .statfs = posix_statfs,
+ .flush = posix_flush,
+ .fsync = posix_fsync,
+ .setxattr = posix_setxattr,
+ .fsetxattr = posix_fsetxattr,
+ .getxattr = posix_getxattr,
+ .fgetxattr = posix_fgetxattr,
+ .removexattr = posix_removexattr,
+ .fremovexattr = posix_fremovexattr,
+ .fsyncdir = posix_fsyncdir,
+ .access = posix_access,
+ .ftruncate = posix_ftruncate,
+ .fstat = posix_fstat,
+ .lk = posix_lk,
+ .inodelk = posix_inodelk,
+ .finodelk = posix_finodelk,
+ .entrylk = posix_entrylk,
+ .fentrylk = posix_fentrylk,
+ .rchecksum = posix_rchecksum,
+ .xattrop = posix_xattrop,
+ .fxattrop = posix_fxattrop,
+ .setattr = posix_setattr,
+ .fsetattr = posix_fsetattr,
+ .fallocate = posix_glfallocate,
+ .discard = posix_discard,
+ .zerofill = posix_zerofill,
+ .ipc = posix_ipc,
+ .seek = posix_seek,
+ .lease = posix_lease,
+ .put = posix_put,
+ .copy_file_range = posix_copy_file_range,
};
struct xlator_cbks cbks = {
- .release = posix_release,
- .releasedir = posix_releasedir,
- .forget = posix_forget
+ .release = posix_release,
+ .releasedir = posix_releasedir,
+ .forget = posix_forget,
};
-struct volume_options options[] = {
- { .key = {"o-direct"},
- .type = GF_OPTION_TYPE_BOOL },
- { .key = {"directory"},
- .type = GF_OPTION_TYPE_PATH },
- { .key = {"hostname"},
- .type = GF_OPTION_TYPE_ANY },
- { .key = {"export-statfs-size"},
- .type = GF_OPTION_TYPE_BOOL },
- { .key = {"mandate-attribute"},
- .type = GF_OPTION_TYPE_BOOL },
- { .key = {"background-unlink"},
- .type = GF_OPTION_TYPE_BOOL },
- { .key = {"janitor-sleep-duration"},
- .type = GF_OPTION_TYPE_INT },
- { .key = {"volume-id"},
- .type = GF_OPTION_TYPE_ANY },
- { .key = {"glusterd-uuid"},
- .type = GF_OPTION_TYPE_STR },
- {
- .key = {"linux-aio"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Support for native Linux AIO"
- },
- {
- .key = {"brick-uid"},
- .type = GF_OPTION_TYPE_INT,
- .min = -1,
- .validate = GF_OPT_VALIDATE_MIN,
- .default_value = "-1",
- .description = "Support for setting uid of brick's owner"
- },
- {
- .key = {"brick-gid"},
- .type = GF_OPTION_TYPE_INT,
- .min = -1,
- .validate = GF_OPT_VALIDATE_MIN,
- .default_value = "-1",
- .description = "Support for setting gid of brick's owner"
- },
- { .key = {"node-uuid-pathinfo"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "return glusterd's node-uuid in pathinfo xattr"
- " string instead of hostname"
- },
- {
- .key = {"health-check-interval"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .default_value = "30",
- .validate = GF_OPT_VALIDATE_MIN,
- .description = "Interval in seconds for a filesystem health check, "
- "set to 0 to disable"
- },
- { .key = {"batch-fsync-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "reverse-fsync",
- .description = "Possible values:\n"
- "\t- syncfs: Perform one syncfs() on behalf oa batch"
- "of fsyncs.\n"
- "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch"
- " of fsyncs and one fsync() per batch.\n"
- "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch"
- " of fsyncs and fsync() each file in the batch in reverse order.\n"
- " in reverse order.\n"
- "\t- reverse-fsync: Perform fsync() of each file in the batch in"
- " reverse order."
- },
- { .key = {"batch-fsync-delay-usec"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- .description = "Num of usecs to wait for aggregating fsync"
- " requests",
- },
- { .key = {"update-link-count-parent"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Enable placeholders for gfid to path conversion"
- },
-#if GF_DARWIN_HOST_OS
- { .key = {"xattr-user-namespace-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "none",
- .description = "Option to control XATTR user namespace on the raw filesystem: "
- "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n"
- " The raw filesystem will not be compatible with OS X Finder.\n"
- "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n"
- },
-#endif
- { .key = {NULL} }
+xlator_api_t xlator_api = {
+ .init = posix_init,
+ .fini = posix_fini,
+ .notify = posix_notify,
+ .reconfigure = posix_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = posix_options,
+ .identifier = "posix",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index c9bfc984da5..b8db146eef2 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -10,23 +10,14 @@
#ifndef _POSIX_H
#define _POSIX_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <time.h>
-#ifdef linux
-#ifdef __GLIBC__
+#ifdef HAVE_SET_FSID
#include <sys/fsuid.h>
-#else
-#include <unistd.h>
-#endif
#endif
#ifdef HAVE_SYS_XATTR_H
@@ -37,13 +28,10 @@
#include <sys/extattr.h>
#endif
-#include "xlator.h"
-#include "inode.h"
-#include "compat.h"
-#include "timer.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/timer.h>
#include "posix-mem-types.h"
-#include "posix-handle.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#ifdef HAVE_LIBAIO
#include <libaio.h>
@@ -53,177 +41,633 @@
#define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/
#define MAX_NO_VECT 1024
-#define POSIX_GFID_HANDLE_SIZE(base_path_len) (base_path_len + SLEN("/") \
- + SLEN(GF_HIDDEN_PATH) + SLEN("/") \
- + SLEN("00/") \
- + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */;
+#define XATTR_KEY_BUF_SIZE 4096
+#define XATTR_VAL_BUF_SIZE 8192
+
+#define ACL_BUFFER_MAX 4096 /* size of character buffer */
+
+#define DHT_LINKTO "trusted.glusterfs.dht.linkto"
+
+#define POSIX_GFID_HANDLE_SIZE(base_path_len) \
+ (base_path_len + SLEN("/") + SLEN(GF_HIDDEN_PATH) + SLEN("/") + \
+ SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */;
+
+#define POSIX_GFID_HANDLE_RELSIZE \
+ SLEN("../") + SLEN("../") + SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1;
+
+#define GF_UNLINK_TRUE 0x0000000000000001
+#define GF_UNLINK_FALSE 0x0000000000000000
+
+#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out) \
+ do { \
+ if (frame->root->pid >= 0 && priv->disk_space_full && \
+ !dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \
+ op_ret = -1; \
+ op_errno = ENOSPC; \
+ gf_msg_debug("posix", ENOSPC, \
+ "disk space utilization reached limits" \
+ " for path %s ", \
+ priv->base_path); \
+ goto out; \
+ } \
+ } while (0)
+
+/* Setting microseconds or nanoseconds depending on what's supported:
+ The passed in `tv` can be
+ struct timespec
+ if supported (better, because it supports nanosecond resolution) or
+ struct timeval
+ otherwise. */
+#if HAVE_UTIMENSAT
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs
+#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
+ (sys_utimensat(AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW))
+#else
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_usec = nanosecs / 1000
+#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) (lutimes(path, tv))
+#endif
+
+#define GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xattr_req, op_ret, \
+ op_errno, _uuid_req, out) \
+ do { \
+ int _ret = 0; \
+ /* TODO: Remove pid check once trash implements client side \
+ * logic to assign gfid for entry creations inside .trashcan \
+ */ \
+ if (frame->root->pid == GF_SERVER_PID_TRASH) \
+ break; \
+ _ret = dict_get_gfuuid(xattr_req, "gfid-req", &_uuid_req); \
+ if (_ret) { \
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, \
+ "failed to get the gfid from dict for %s", loc->path); \
+ op_ret = -1; \
+ op_errno = EINVAL; \
+ goto out; \
+ } \
+ if (gf_uuid_is_null(_uuid_req)) { \
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, \
+ "gfid is null for %s", loc->path); \
+ op_ret = -1; \
+ op_errno = EINVAL; \
+ goto out; \
+ } \
+ } while (0)
/**
* posix_fd - internal structure common to file and directory fd's
*/
struct posix_fd {
- int fd; /* fd returned by the kernel */
- int32_t flags; /* flags for open/creat */
- DIR * dir; /* handle returned by the kernel */
- int odirect;
- struct list_head list; /* to add to the janitor list */
+ int fd; /* fd returned by the kernel */
+ int32_t flags; /* flags for open/creat */
+ DIR *dir; /* handle returned by the kernel */
+ off_t dir_eof; /* offset at dir EOF */
+ struct list_head list; /* to add to the janitor list */
+ int odirect;
+ xlator_t *xl;
+ char _pad[4]; /* manual padding */
};
-
struct posix_private {
- char *base_path;
- int32_t base_path_length;
+ char *base_path;
+ int32_t base_path_length;
+ int32_t path_max;
- gf_lock_t lock;
+ gf_lock_t lock;
- char *hostname;
- /* Statistics, provides activity of the server */
+ char *hostname;
- struct timeval prev_fetch_time;
- struct timeval init_time;
+ time_t last_landfill_check;
- time_t last_landfill_check;
- int32_t janitor_sleep_duration;
- struct list_head janitor_fds;
- pthread_cond_t janitor_cond;
- pthread_mutex_t janitor_lock;
+ gf_atomic_t read_value; /* Total read, from init */
+ gf_atomic_t write_value; /* Total write, from init */
- int64_t read_value; /* Total read, from init */
- int64_t write_value; /* Total write, from init */
- int64_t nr_files;
-/*
- In some cases, two exported volumes may reside on the same
- partition on the server. Sending statvfs info for both
- the volumes will lead to erroneous df output at the client,
- since free space on the partition will be counted twice.
+ /* janitor task which cleans up /.trash (created by replicate) */
+ struct gf_tw_timer_list *janitor;
- In such cases, user can disable exporting statvfs info
- on one of the volumes by setting this option.
-*/
- gf_boolean_t export_statfs;
+ char *trash_path;
+ /* lock for brick dir */
+ int mount_lock;
- gf_boolean_t o_direct; /* always open files in O_DIRECT mode */
+ struct stat handledir;
+ /* uuid of glusterd that swapned the brick process */
+ uuid_t glusterd_uuid;
-/*
- decide whether posix_unlink does open (file), unlink (file), close (fd)
- instead of just unlink (file). with the former approach there is no lockout
- of access to parent directory during removal of very large files for the
- entire duration of freeing of data blocks.
-*/
- gf_boolean_t background_unlink;
+#ifdef HAVE_LIBAIO
+ io_context_t ctxp;
+ pthread_t aiothread;
+#endif
-/* janitor thread which cleans up /.trash (created by replicate) */
- pthread_t janitor;
- gf_boolean_t janitor_present;
- char * trash_path;
-/* lock for brick dir */
- DIR *mount_lock;
+ pthread_t fsyncer;
+ struct list_head fsyncs;
+ pthread_mutex_t fsync_mutex;
+ pthread_cond_t fsync_cond;
+ pthread_mutex_t janitor_mutex;
+ pthread_cond_t janitor_cond;
+ pthread_cond_t fd_cond;
+ int fsync_queue_count;
+ int32_t janitor_sleep_duration;
+
+ enum {
+ BATCH_NONE = 0,
+ BATCH_SYNCFS,
+ BATCH_SYNCFS_SINGLE_FSYNC,
+ BATCH_REVERSE_FSYNC,
+ BATCH_SYNCFS_REVERSE_FSYNC
+ } batch_fsync_mode;
+
+ uint32_t batch_fsync_delay_usec;
+ char gfid2path_sep[8];
+
+ /* seconds to sleep between health checks */
+ uint32_t health_check_interval;
+ /* seconds to sleep to wait for aio write finish for health checks */
+ uint32_t health_check_timeout;
+ pthread_t health_check;
+
+ double disk_reserve;
+ pthread_t disk_space_check;
+ uint32_t disk_space_full;
- struct stat handledir;
+#ifdef GF_DARWIN_HOST_OS
+ enum {
+ XATTR_NONE = 0,
+ XATTR_STRIP,
+ XATTR_APPEND,
+ XATTR_BOTH,
+ } xattr_user_namespace;
+#endif
-/* uuid of glusterd that swapned the brick process */
- uuid_t glusterd_uuid;
+ /* Option to handle the cases of multiple bricks exported from
+ same backend. Very much usable in brick-splitting feature. */
+ int32_t shared_brick_count;
+
+ /*Option to set mode bit permission that will always be set on
+ file/directory. */
+ mode_t force_create_mode;
+ mode_t force_directory_mode;
+ mode_t create_mask;
+ mode_t create_directory_mask;
+ uint32_t max_hardlinks;
+ int32_t arrdfd[256];
+ int dirfd;
+
+ /* This option is used for either to call a landfill_purge or not */
+ gf_boolean_t disable_landfill_purge;
+
+ gf_boolean_t fips_mode_rchecksum;
+ gf_boolean_t ctime;
+ gf_boolean_t janitor_task_stop;
+
+ gf_boolean_t disk_space_check_active;
+ char disk_unit;
+ gf_boolean_t health_check_active;
+ gf_boolean_t update_pgfid_nlinks;
+ gf_boolean_t gfid2path;
+ /* node-uuid in pathinfo xattr */
+ gf_boolean_t node_uuid_pathinfo;
+ /*
+ In some cases, two exported volumes may reside on the same
+ partition on the server. Sending statvfs info for both
+ the volumes will lead to erroneous df output at the client,
+ since free space on the partition will be counted twice.
+
+ In such cases, user can disable exporting statvfs info
+ on one of the volumes by setting this option.
+ */
+ gf_boolean_t export_statfs;
+
+ gf_boolean_t o_direct; /* always open files in O_DIRECT mode */
+
+ /*
+ decide whether posix_unlink does open (file), unlink (file), close (fd)
+ instead of just unlink (file). with the former approach there is no
+ lockout of access to parent directory during removal of very large files
+ for the entire duration of freeing of data blocks.
+ */
+ gf_boolean_t background_unlink;
+ gf_boolean_t aio_configured;
+ gf_boolean_t aio_init_done;
+ gf_boolean_t aio_capable;
+ uint32_t rel_fdcount;
+};
- gf_boolean_t aio_configured;
- gf_boolean_t aio_init_done;
- gf_boolean_t aio_capable;
-#ifdef HAVE_LIBAIO
- io_context_t ctxp;
- pthread_t aiothread;
-#endif
+typedef struct {
+ call_frame_t *frame;
+ xlator_t *this;
+ const char *real_path;
+ dict_t *xattr;
+ struct iatt *stbuf;
+ loc_t *loc;
+ inode_t *inode; /* for all do_xattrop() key handling */
+ fd_t *fd;
+ int fdnum;
+ int flags;
+ char *list;
+ size_t list_size;
+ int32_t op_errno;
+
+ char _pad[4]; /* manual padding */
+} posix_xattr_filler_t;
- /* node-uuid in pathinfo xattr */
- gf_boolean_t node_uuid_pathinfo;
+typedef struct {
+ uint64_t unlink_flag;
+ pthread_mutex_t xattrop_lock;
+ pthread_mutex_t write_atomic_lock;
+ pthread_mutex_t pgfid_lock;
+} posix_inode_ctx_t;
+
+#define POSIX_BASE_PATH(this) \
+ (((struct posix_private *)this->private)->base_path)
+
+#define POSIX_BASE_PATH_LEN(this) \
+ (((struct posix_private *)this->private)->base_path_length)
+
+#define POSIX_PATH_MAX(this) (((struct posix_private *)this->private)->path_max)
+
+#define POSIX_GET_FILE_UNLINK_PATH(base_path, gfid, unlink_path) \
+ do { \
+ int path_len = 0; \
+ char gfid_str[64] = {0}; \
+ uuid_utoa_r(gfid, gfid_str); \
+ path_len = strlen(base_path) + 1 + SLEN(GF_UNLINK_PATH) + 1 + \
+ UUID_CANONICAL_FORM_LEN + 1; \
+ unlink_path = alloca(path_len); \
+ if (!unlink_path) { \
+ gf_msg("posix", GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, \
+ "Failed to get unlink_path"); \
+ break; \
+ } \
+ sprintf(unlink_path, "%s/%s/%s", base_path, GF_UNLINK_PATH, gfid_str); \
+ } while (0)
- pthread_t fsyncer;
- struct list_head fsyncs;
- pthread_mutex_t fsync_mutex;
- pthread_cond_t fsync_cond;
- int fsync_queue_count;
+/* Helper functions */
+int
+posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx);
- enum {
- BATCH_NONE = 0,
- BATCH_SYNCFS,
- BATCH_SYNCFS_SINGLE_FSYNC,
- BATCH_REVERSE_FSYNC,
- BATCH_SYNCFS_REVERSE_FSYNC
- } batch_fsync_mode;
+int
+posix_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+ posix_inode_ctx_t **ctx);
- uint32_t batch_fsync_delay_usec;
- gf_boolean_t update_pgfid_nlinks;
+int
+__posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx);
- /* seconds to sleep between health checks */
- uint32_t health_check_interval;
- pthread_t health_check;
- gf_boolean_t health_check_active;
+int
+__posix_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+ posix_inode_ctx_t **ctx);
-#ifdef GF_DARWIN_HOST_OS
- enum {
- XATTR_NONE = 0,
- XATTR_STRIP,
- XATTR_APPEND,
- XATTR_BOTH,
- } xattr_user_namespace;
-#endif
+int
+posix_gfid_set(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req,
+ pid_t pid, int *op_errno);
+int
+posix_fdstat(xlator_t *this, inode_t *inode, int fd, struct iatt *stbuf_p);
+int
+posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename,
+ struct iatt *iatt);
+int
+posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *real_path,
+ struct iatt *iatt, gf_boolean_t inode_locked);
-};
+dict_t *
+posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd,
+ int fdnum, dict_t *xattr, struct iatt *buf);
+int
+posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
+ data_t *value, int flags, struct iatt *stbuf);
+int
+posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key,
+ data_t *value, int flags, struct iatt *stbuf, fd_t *_fd);
+void
+posix_janitor_timer_start(xlator_t *this);
+int
+posix_acl_xattr_set(xlator_t *this, const char *path, dict_t *xattr_req);
+int
+posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc,
+ dict_t *xattr_req);
+int
+posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
+ dict_t *dict);
-typedef struct {
- xlator_t *this;
- const char *real_path;
- dict_t *xattr;
- struct iatt *stbuf;
- loc_t *loc;
- inode_t *inode; /* for all do_xattrop() key handling */
- int fd;
- int flags;
- int32_t op_errno;
-} posix_xattr_filler_t;
+int
+posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd,
+ int *op_errno);
+void
+posix_fill_ino_from_gfid(xlator_t *this, struct iatt *buf);
+gf_boolean_t
+posix_special_xattr(char **pattern, char *key);
-#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path)
+void
+__posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size);
+int
+posix_spawn_health_check_thread(xlator_t *this);
-#define POSIX_BASE_PATH_LEN(this) (((struct posix_private *)this->private)->base_path_length)
+int
+posix_spawn_disk_space_check_thread(xlator_t *this);
-/* Helper functions */
-int posix_gfid_set (xlator_t *this, const char *path, loc_t *loc,
- dict_t *xattr_req);
-int posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p);
-int posix_istat (xlator_t *this, uuid_t gfid, const char *basename,
- struct iatt *iatt);
-int posix_pstat (xlator_t *this, uuid_t gfid, const char *real_path,
- struct iatt *iatt);
-dict_t *posix_lookup_xattr_fill (xlator_t *this, const char *path,
- loc_t *loc, dict_t *xattr, struct iatt *buf);
-int posix_handle_pair (xlator_t *this, const char *real_path, char *key,
- data_t *value, int flags);
-int posix_fhandle_pair (xlator_t *this, int fd, char *key, data_t *value,
- int flags);
-void posix_spawn_janitor_thread (xlator_t *this);
-int posix_get_file_contents (xlator_t *this, uuid_t pargfid,
- const char *name, char **contents);
-int posix_set_file_contents (xlator_t *this, const char *path, char *key,
- data_t *value, int flags);
-int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req);
-int posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req);
-int posix_entry_create_xattr_set (xlator_t *this, const char *path,
- dict_t *dict);
-
-int posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd);
-void posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf);
-
-gf_boolean_t posix_special_xattr (char **pattern, char *key);
+void *
+posix_fsyncer(void *);
+int
+posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head,
+ char **path, int type, int32_t *op_errno, dict_t *xdata);
+int
+posix_handle_mdata_xattr(call_frame_t *frame, const char *name, int *op_errno);
+int
+posix_handle_georep_xattrs(call_frame_t *, const char *, int *, gf_boolean_t);
+int32_t
+posix_resolve_dirgfid_to_path(const uuid_t dirgfid, const char *brick_path,
+ const char *bname, char **path);
+void
+posix_gfid_unset(xlator_t *this, dict_t *xdata);
+
+int
+posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s);
+
+int32_t
+posix_get_objectsignature(char *, dict_t *);
+
+int32_t
+posix_fdget_objectsignature(int, dict_t *);
+
+gf_boolean_t
+posix_is_bulk_removexattr(char *name, dict_t *dict);
+
+int32_t
+posix_set_iatt_in_dict(dict_t *, struct iatt *, struct iatt *);
+
+mode_t posix_override_umask(mode_t, mode_t);
+
+int32_t
+posix_priv(xlator_t *this);
+
+int32_t
+posix_inode(xlator_t *this);
void
-__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
- off_t offset, size_t size);
-void posix_spawn_health_check_thread (xlator_t *this);
+posix_fini(xlator_t *this);
+
+int
+posix_init(xlator_t *this);
+
+int
+posix_reconfigure(xlator_t *this, dict_t *options);
+
+int32_t
+posix_notify(xlator_t *this, int32_t event, void *data, ...);
+
+/* posix-entry-ops.c FOP signatures */
+int32_t
+posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int
+posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+posix_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
-void *posix_fsyncer (void *);
int
-posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
- gf_dirent_t *head, char **path, int type, int32_t *op_errno,
- dict_t *xdata);
+posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata);
+
+int
+posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+/* posix-inode-fs-ops.c FOP signatures */
+int
+posix_forget(xlator_t *this, inode_t *inode);
+
+int32_t
+posix_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int
+posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+
+int32_t
+posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata);
+
+int32_t
+posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
+
+int32_t
+posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+int32_t
+posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+posix_releasedir(xlator_t *this, fd_t *fd);
+
+int32_t
+posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int32_t
+posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
+
+int32_t
+posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t
+posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+posix_release(xlator_t *this, fd_t *fd);
+
+int32_t
+posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata);
+
+int
+posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata);
+
+int
+posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head,
+ char **path, int type, int32_t *op_errno, dict_t *xdata);
+
+int32_t
+posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int32_t
+posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata);
+
+int32_t
+posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata);
+
+int
+posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int
+posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int
+posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int32_t
+posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int32_t
+posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
+
+int32_t
+posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
+int32_t
+posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict,
+ struct iatt *in_stbuf);
+
+gf_cs_obj_state
+posix_cs_check_status(xlator_t *this, const char *realpath, int *fd,
+ struct iatt *buf);
+
+int
+posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state,
+ char const *path, int *fd);
+
+gf_cs_obj_state
+posix_cs_heal_state(xlator_t *this, const char *path, int *fd,
+ struct iatt *stbuf);
+int
+posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
+ struct iatt *buf, const char *realpath, dict_t *xattr_req,
+ dict_t **xattr_rsp, gf_boolean_t ignore_failure);
+int
+posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
+
+int
+posix_spawn_ctx_janitor_thread(xlator_t *this);
+
+void
+posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+
+gf_boolean_t
+posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
#endif /* _POSIX_H */