diff options
Diffstat (limited to 'xlators/storage')
28 files changed, 16870 insertions, 13878 deletions
diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am index c08e8e41bca..5e3ed0eb93b 100644 --- a/xlators/storage/Makefile.am +++ b/xlators/storage/Makefile.am @@ -1,7 +1,3 @@ SUBDIRS = posix -if ENABLE_BD_XLATOR -SUBDIRS += bd -endif - CLEANFILES = diff --git a/xlators/storage/bd/Makefile.am b/xlators/storage/bd/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/storage/bd/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/storage/bd/src/Makefile.am b/xlators/storage/bd/src/Makefile.am deleted file mode 100644 index 60ceff31b20..00000000000 --- a/xlators/storage/bd/src/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -if ENABLE_BD_XLATOR -xlator_LTLIBRARIES = bd.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage - -bd_la_LDFLAGS = -module -avoid-version -LIBBD = -llvm2app -lrt -bd_la_SOURCES = bd.c bd-helper.c bd-aio.c -bd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD) $(LIBAIO) - -noinst_HEADERS = bd.h bd-aio.h bd-mem-types.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src \ - -I$(top_srcdir)/rpc/rpc-lib/src - -AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) - -CLEANFILES = - -endif diff --git a/xlators/storage/bd/src/bd-aio.c b/xlators/storage/bd/src/bd-aio.c deleted file mode 100644 index 9dc13b3ec60..00000000000 --- a/xlators/storage/bd/src/bd-aio.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - Copyright IBM, Corp. 2013 - - This file is part of GlusterFS. - - Author: M. Mohan Kumar <mohan@in.ibm.com> - - Based on posix-aio.c - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <lvm2app.h> -#include <sys/uio.h> - -#include "xlator.h" -#include "glusterfs.h" -#include "defaults.h" -#include "bd.h" -#include "bd-aio.h" - -#ifdef HAVE_LIBAIO -#include <libaio.h> -#include "bd-mem-types.h" - -struct bd_aio_cb { - struct iocb iocb; - call_frame_t *frame; - struct iobuf *iobuf; - struct iobref *iobref; - struct iatt prebuf; - int op; - off_t offset; - fd_t *fd; -}; - -void -__bd_fd_set_odirect (fd_t *fd, bd_fd_t *bd_fd, int opflags, - off_t offset, size_t size) -{ - int odirect = 0; - int flags = 0; - int ret = 0; - - odirect = bd_fd->odirect; - - if ((fd->flags|opflags) & O_DIRECT) { - /* if instructed, use O_DIRECT always */ - odirect = 1; - } else { - /* else use O_DIRECT when feasible */ - if ((offset|size) & 0xfff) - odirect = 0; - else - odirect = 1; - } - - if (!odirect && bd_fd->odirect) { - flags = fcntl (bd_fd->fd, F_GETFL); - ret = fcntl (bd_fd->fd, F_SETFL, (flags & (~O_DIRECT))); - bd_fd->odirect = 0; - } - - if (odirect && !bd_fd->odirect) { - flags = fcntl (bd_fd->fd, F_GETFL); - ret = fcntl (bd_fd->fd, F_SETFL, (flags | O_DIRECT)); - bd_fd->odirect = 1; - } - - if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, - "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d", - strerror (errno), bd_fd->fd, flags, bd_fd->odirect); - } -} - -int -bd_aio_readv_complete (struct bd_aio_cb *paiocb, int res, int res2) -{ - call_frame_t *frame = NULL; - xlator_t *this = NULL; - struct iobuf *iobuf = NULL; - struct iatt postbuf = {0,}; - int op_ret = -1; - int op_errno = 0; - struct iovec iov; - struct iobref *iobref = NULL; - off_t offset = 0; - bd_attr_t *bdatt = NULL; - - frame = paiocb->frame; - this = frame->this; - iobuf = paiocb->iobuf; - offset = paiocb->offset; - - if (res < 0) { - op_ret = -1; - op_errno = -res; - gf_log (this->name, GF_LOG_ERROR, - "readv(async) failed fd=%p,size=%lu,offset=%llu (%d/%s)", - paiocb->fd, paiocb->iocb.u.c.nbytes, - (unsigned long long) paiocb->offset, - res, strerror (op_errno)); - goto out; - } - - bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt); - memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt)); - - op_ret = res; - op_errno = 0; - - iobref = iobref_new (); - if (!iobref) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - iobref_add (iobref, iobuf); - - iov.iov_base = iobuf_ptr (iobuf); - iov.iov_len = op_ret; - - /* Hack to notify higher layers of EOF. */ - if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size) - op_errno = ENOENT; - -out: - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1, - &postbuf, iobref, NULL); - if (iobuf) - iobuf_unref (iobuf); - if (iobref) - iobref_unref (iobref); - - GF_FREE (paiocb); - - return 0; -} - -int -bd_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, uint32_t flags, dict_t *xdata) -{ - int32_t op_errno = EINVAL; - int _fd = -1; - struct iobuf *iobuf = NULL; - bd_fd_t *bd_fd = NULL; - int ret = -1; - struct bd_aio_cb *paiocb = NULL; - bd_priv_t *priv = NULL; - struct iocb *iocb = NULL; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - priv = this->private; - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd) { - STACK_WIND (frame, default_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, offset, - flags, xdata); - return 0; - } - _fd = bd_fd->fd; - bd_inode_ctx_get (fd->inode, this, &bdatt); - if (!size) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); - goto err; - } - - iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); - if (!iobuf) { - op_errno = ENOMEM; - goto err; - } - - paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_bd_aio_cb); - if (!paiocb) { - op_errno = ENOMEM; - goto err; - } - - paiocb->frame = frame; - paiocb->iobuf = iobuf; - paiocb->offset = offset; - paiocb->op = GF_FOP_READ; - paiocb->fd = fd; - - paiocb->iocb.data = paiocb; - paiocb->iocb.aio_fildes = _fd; - paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD; - paiocb->iocb.aio_reqprio = 0; - paiocb->iocb.u.c.buf = iobuf_ptr (iobuf); - paiocb->iocb.u.c.nbytes = size; - paiocb->iocb.u.c.offset = offset; - - iocb = &paiocb->iocb; - - LOCK (&fd->lock); - { - __bd_fd_set_odirect (fd, bd_fd, flags, offset, size); - - ret = io_submit (priv->ctxp, 1, &iocb); - } - UNLOCK (&fd->lock); - - if (ret != 1) { - gf_log (this->name, GF_LOG_ERROR, - "io_submit() returned %d", ret); - op_errno = -ret; - goto err; - } - - return 0; -err: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0); - if (iobuf) - iobuf_unref (iobuf); - - if (paiocb) - GF_FREE (paiocb); - - return 0; -} - -int -bd_aio_writev_complete (struct bd_aio_cb *paiocb, int res, int res2) -{ - call_frame_t *frame = NULL; - xlator_t *this = NULL; - struct iatt prebuf = {0,}; - struct iatt postbuf = {0,}; - int op_ret = -1; - int op_errno = 0; - bd_attr_t *bdatt = NULL; - - frame = paiocb->frame; - prebuf = paiocb->prebuf; - this = frame->this; - - if (res < 0) { - op_ret = -1; - op_errno = -res; - gf_log (this->name, GF_LOG_ERROR, - "writev(async) failed fd=%p,offset=%llu (%d/%s)", - paiocb->fd, (unsigned long long) paiocb->offset, res, - strerror (op_errno)); - - goto out; - } - - bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt); - bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME); - memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt)); - - op_ret = res; - op_errno = 0; - -out: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf, - NULL); - - if (paiocb) { - if (paiocb->iobref) - iobref_unref (paiocb->iobref); - GF_FREE (paiocb); - } - - return 0; -} - -int -bd_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *iov, int count, off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) -{ - int32_t op_errno = EINVAL; - int _fd = -1; - bd_fd_t *bd_fd = NULL; - int ret = -1; - struct bd_aio_cb *paiocb = NULL; - bd_priv_t *priv = NULL; - struct iocb *iocb = NULL; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - priv = this->private; - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd) { - STACK_WIND (frame, default_writev_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - fd, iov, count, offset, flags, iobref, xdata); - return 0; - } - - bd_inode_ctx_get (fd->inode, this, &bdatt); - - _fd = bd_fd->fd; - - paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_bd_aio_cb); - if (!paiocb) { - op_errno = ENOMEM; - goto err; - } - - - paiocb->frame = frame; - paiocb->offset = offset; - paiocb->op = GF_FOP_WRITE; - paiocb->fd = fd; - - paiocb->iocb.data = paiocb; - paiocb->iocb.aio_fildes = _fd; - paiocb->iobref = iobref_ref (iobref); - paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV; - paiocb->iocb.aio_reqprio = 0; - paiocb->iocb.u.v.vec = iov; - paiocb->iocb.u.v.nr = count; - paiocb->iocb.u.v.offset = offset; - - iocb = &paiocb->iocb; - - memcpy (&paiocb->prebuf, &bdatt->iatt, sizeof (struct iatt)); - LOCK (&fd->lock); - { - __bd_fd_set_odirect (fd, bd_fd, flags, offset, - iov_length (iov, count)); - - ret = io_submit (priv->ctxp, 1, &iocb); - } - UNLOCK (&fd->lock); - - if (ret != 1) { - gf_log (this->name, GF_LOG_ERROR, - "io_submit() returned %d", ret); - op_errno = -ret; - goto err; - } - - return 0; -err: - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0); - - if (paiocb) { - if (paiocb->iobref) - iobref_unref (paiocb->iobref); - GF_FREE (paiocb); - } - - return 0; -} - -void * -bd_aio_thread (void *data) -{ - xlator_t *this = NULL; - bd_priv_t *priv = NULL; - int ret = 0; - int i = 0; - struct io_event *event = NULL; - struct bd_aio_cb *paiocb = NULL; - struct io_event events[BD_AIO_MAX_NR_GETEVENTS]; - struct timespec ts = {0, }; - - this = data; - THIS = this; - priv = this->private; - - ts.tv_sec = 5; - for (;;) { - memset (&events[0], 0, sizeof (events)); - ret = io_getevents (priv->ctxp, 1, BD_AIO_MAX_NR_GETEVENTS, - &events[0], &ts); - if (ret < 0) { - if (ret == -EINTR) - continue; - gf_log (this->name, GF_LOG_ERROR, - "io_getevents() returned %d, exiting", ret); - break; - } - - for (i = 0; i < ret; i++) { - event = &events[i]; - - paiocb = event->data; - - switch (paiocb->op) { - case GF_FOP_READ: - bd_aio_readv_complete (paiocb, event->res, - event->res2); - break; - case GF_FOP_WRITE: - bd_aio_writev_complete (paiocb, event->res, - event->res2); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "unknown op %d found in piocb", - paiocb->op); - break; - } - } - } - - return NULL; -} - -int -bd_aio_init (xlator_t *this) -{ - bd_priv_t *priv = NULL; - int ret = 0; - - priv = this->private; - - ret = io_setup (BD_AIO_MAX_NR_EVENTS, &priv->ctxp); - if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) { - gf_log (this->name, GF_LOG_WARNING, - "Linux AIO not available at run-time." - " Continuing with synchronous IO"); - ret = 0; - goto out; - } - - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "io_setup() failed. ret=%d, errno=%d", - ret, errno); - goto out; - } - - ret = pthread_create (&priv->aiothread, NULL, - bd_aio_thread, this); - if (ret != 0) { - io_destroy (priv->ctxp); - goto out; - } - - this->fops->readv = bd_aio_readv; - this->fops->writev = bd_aio_writev; -out: - return ret; -} - - -int -bd_aio_on (xlator_t *this) -{ - bd_priv_t *priv = NULL; - int ret = 0; - - priv = this->private; - - if (!priv->aio_init_done) { - ret = bd_aio_init (this); - if (ret == 0) - priv->aio_capable = _gf_true; - else - priv->aio_capable = _gf_false; - priv->aio_init_done = _gf_true; - } - - if (priv->aio_capable) { - this->fops->readv = bd_aio_readv; - this->fops->writev = bd_aio_writev; - } - - return ret; -} - -int -bd_aio_off (xlator_t *this) -{ - this->fops->readv = bd_readv; - this->fops->writev = bd_writev; - - return 0; -} - -#else - -int -bd_aio_on (xlator_t *this) -{ - gf_log (this->name, GF_LOG_INFO, - "Linux AIO not available at build-time." - " Continuing with synchronous IO"); - return 0; -} - -int -bd_aio_off (xlator_t *this) -{ - gf_log (this->name, GF_LOG_INFO, - "Linux AIO not available at build-time." - " Continuing with synchronous IO"); - return 0; -} - -void -__bd_fd_set_odirect (fd_t *fd, struct bd_fd *pfd, int opflags, - off_t offset, size_t size) -{ - xlator_t *this = THIS; - gf_log (this->name, GF_LOG_INFO, - "Linux AIO not available at build-time." - " Continuing with synchronous IO"); - return; -} -#endif diff --git a/xlators/storage/bd/src/bd-aio.h b/xlators/storage/bd/src/bd-aio.h deleted file mode 100644 index 16f686a4caa..00000000000 --- a/xlators/storage/bd/src/bd-aio.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - Copyright IBM, Corp. 2013 - - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef _BD_AIO_H -#define _BD_AIO_H - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "xlator.h" -#include "glusterfs.h" - -/* - * Maximum number of concurrently submitted IO events. The heaviest load - * GlusterFS has been able to handle had 60-80 concurrent calls - */ -#define BD_AIO_MAX_NR_EVENTS 256 - -/* Maximum number of completed IO operations to reap per getevents syscall */ -#define BD_AIO_MAX_NR_GETEVENTS 16 - -int bd_aio_on (xlator_t *this); -int bd_aio_off (xlator_t *this); - -int bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata); - -int bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata); - -#endif /* !_BD_AIO_H */ diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c deleted file mode 100644 index 8aaffa6b27a..00000000000 --- a/xlators/storage/bd/src/bd-helper.c +++ /dev/null @@ -1,1023 +0,0 @@ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif -#include <lvm2app.h> -#ifdef HAVE_LIBAIO -#include <libaio.h> -#endif -#include <linux/fs.h> -#include <sys/ioctl.h> -#include "bd.h" -#include "bd-mem-types.h" -#include "run.h" -#include "lvm-defaults.h" - -int -bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx) -{ - int ret = -1; - uint64_t ctx_int = 0; - - GF_VALIDATE_OR_GOTO (this->name, inode, out); - GF_VALIDATE_OR_GOTO (this->name, ctx, out); - - ctx_int = (long)ctx; - ret = inode_ctx_set (inode, this, &ctx_int); -out: - return ret; -} - -int -bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx) -{ - int ret = -1; - uint64_t ctx_int = 0; - - GF_VALIDATE_OR_GOTO (this->name, inode, out); - ret = inode_ctx_get (inode, this, &ctx_int); - if (ret) - return ret; - if (ctx) - *ctx = (bd_attr_t *) ctx_int; -out: - return ret; -} - -void -bd_local_free (xlator_t *this, bd_local_t *local) -{ - if (!local) - return; - if (local->fd) - fd_unref (local->fd); - else if (local->loc.path) - loc_wipe (&local->loc); - if (local->dict) - dict_unref (local->dict); - if (local->inode) - inode_unref (local->inode); - if (local->bdatt) { - GF_FREE (local->bdatt->type); - GF_FREE (local->bdatt); - } - mem_put (local); - local = NULL; -} - -bd_local_t * -bd_local_init (call_frame_t *frame, xlator_t *this) -{ - frame->local = mem_get0 (this->local_pool); - if (!frame->local) - return NULL; - - return frame->local; -} - -/* - * VG are set with the tag in GF_XATTR_VOL_ID_KEY:<uuid> format. - * This function validates this tag agains volume-uuid. Also goes - * through LV list to find out if a thin-pool is configured or not. - */ -int bd_scan_vg (xlator_t *this, bd_priv_t *priv) -{ - vg_t brick = NULL; - data_t *tmp_data = NULL; - struct dm_list *tags = NULL; - int op_ret = -1; - uuid_t dict_uuid = {0, }; - uuid_t vg_uuid = {0, }; - gf_boolean_t uuid = _gf_false; - lvm_str_list_t *strl = NULL; - struct dm_list *lv_dm_list = NULL; - lv_list_t *lv_list = NULL; - struct dm_list *dm_seglist = NULL; - lvseg_list_t *seglist = NULL; - lvm_property_value_t prop = {0, }; - gf_boolean_t thin = _gf_false; - const char *lv_name = NULL; - - brick = lvm_vg_open (priv->handle, priv->vg, "w", 0); - if (!brick) { - gf_log (this->name, GF_LOG_CRITICAL, "VG %s is not found", - priv->vg); - return ENOENT; - } - - lv_dm_list = lvm_vg_list_lvs (brick); - if (!lv_dm_list) - goto check; - - dm_list_iterate_items (lv_list, lv_dm_list) { - dm_seglist = lvm_lv_list_lvsegs (lv_list->lv); - if (!dm_seglist) - continue; - dm_list_iterate_items (seglist, dm_seglist) { - prop = lvm_lvseg_get_property (seglist->lvseg, - "segtype"); - if (!prop.is_valid || !prop.value.string) - continue; - if (!strcmp (prop.value.string, "thin-pool")) { - thin = _gf_true; - lv_name = lvm_lv_get_name (lv_list->lv); - priv->pool = gf_strdup (lv_name); - gf_log (THIS->name, GF_LOG_INFO, "Thin Pool " - "\"%s\" will be used for thin LVs", - lv_name); - break; - } - } - } - -check: - /* If there is no volume-id set in dict, we cant validate */ - tmp_data = dict_get (this->options, "volume-id"); - if (!tmp_data) { - op_ret = 0; - goto out; - } - - op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "wrong volume-id (%s) set in volume file", - tmp_data->data); - op_ret = -1; - goto out; - } - - tags = lvm_vg_get_tags (brick); - if (!tags) { /* no tags in the VG */ - gf_log (this->name, GF_LOG_ERROR, - "Extended attribute trusted.glusterfs." - "volume-id is absent"); - op_ret = -1; - goto out; - } - dm_list_iterate_items (strl, tags) { - if (!strncmp (strl->str, GF_XATTR_VOL_ID_KEY, - strlen (GF_XATTR_VOL_ID_KEY))) { - uuid = _gf_true; - break; - } - } - /* UUID tag is not set in VG */ - if (!uuid) { - gf_log (this->name, GF_LOG_ERROR, - "Extended attribute trusted.glusterfs." - "volume-id is absent"); - op_ret = -1; - goto out; - } - - op_ret = gf_uuid_parse (strl->str + strlen (GF_XATTR_VOL_ID_KEY) + 1, - vg_uuid); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "wrong volume-id (%s) set in VG", strl->str); - op_ret = -1; - goto out; - } - if (gf_uuid_compare (dict_uuid, vg_uuid)) { - gf_log (this->name, GF_LOG_ERROR, - "mismatching volume-id (%s) received. " - "already is a part of volume %s ", - tmp_data->data, vg_uuid); - op_ret = -1; - goto out; - } - - op_ret = 0; - -out: - lvm_vg_close (brick); - - if (!thin) - gf_log (THIS->name, GF_LOG_WARNING, "No thin pool found in " - "VG %s\n", priv->vg); - else - priv->caps |= BD_CAPS_THIN; - - return op_ret; -} - -/* FIXME: Move this code to common place, so posix and bd xlator can use */ -char * -page_aligned_alloc (size_t size, char **aligned_buf) -{ - char *alloc_buf = NULL; - char *buf = NULL; - - alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_common_mt_char); - if (!alloc_buf) - return NULL; - /* page aligned buffer */ - buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); - *aligned_buf = buf; - - return alloc_buf; -} - -static int -__bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd_p) -{ - int ret = -1; - int _fd = -1; - char *devpath = NULL; - bd_fd_t *bdfd = NULL; - uint64_t tmp_bdfd = 0; - bd_priv_t *priv = this->private; - bd_gfid_t gfid = {0, }; - bd_attr_t *bdatt = NULL; - - /* not bd file */ - if (fd->inode->ia_type != IA_IFREG || - bd_inode_ctx_get (fd->inode, this, &bdatt)) - return 0; - - ret = __fd_ctx_get (fd, this, &tmp_bdfd); - if (ret == 0) { - bdfd = (void *)(long) tmp_bdfd; - *bdfd_p = bdfd; - return 0; - } - - uuid_utoa_r (fd->inode->gfid, gfid); - gf_asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid); - if (!devpath) - goto out; - - _fd = open (devpath, O_RDWR | O_LARGEFILE, 0); - if (_fd < 0) { - ret = errno; - gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath, - strerror (ret)); - goto out; - } - bdfd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd); - BD_VALIDATE_MEM_ALLOC (bdfd, ret, out); - - bdfd->fd = _fd; - bdfd->flag = O_RDWR | O_LARGEFILE; - if (__fd_ctx_set (fd, this, (uint64_t)(long)bdfd) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set the fd context fd=%p", fd); - goto out; - } - - *bdfd_p = bdfd; - - ret = 0; -out: - GF_FREE (devpath); - if (ret) { - if (_fd >= 0) - close (_fd); - GF_FREE (bdfd); - } - return ret; -} - -int -bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd) -{ - int ret; - - /* FIXME: Is it ok to fd->lock here ? */ - LOCK (&fd->lock); - { - ret = __bd_fd_ctx_get (this, fd, bdfd); - } - UNLOCK (&fd->lock); - - return ret; -} - -/* - * Validates if LV exists for given inode or not. - * Returns 0 if LV exists and size also matches. - * If LV does not exist -1 returned - * If LV size mismatches, returnes 1 also lv_size is updated with actual - * size - */ -int -bd_validate_bd_xattr (xlator_t *this, char *bd, char **type, - uint64_t *lv_size, uuid_t uuid) -{ - char *path = NULL; - int ret = -1; - bd_gfid_t gfid = {0, }; - bd_priv_t *priv = this->private; - struct stat stbuf = {0, }; - uint64_t size = 0; - vg_t vg = NULL; - lv_t lv = NULL; - char *bytes = NULL; - - bytes = strrchr (bd, ':'); - if (bytes) { - *bytes = '\0'; - bytes++; - gf_string2bytesize (bytes, &size); - } - - if (strcmp (bd, BD_LV) && strcmp (bd, BD_THIN)) { - gf_log (this->name, GF_LOG_WARNING, - "invalid xattr %s", bd); - return -1; - } - *type = gf_strdup (bd); - - /* - * Check if LV really exist, there could be a failure - * after setxattr and successful LV creation - */ - uuid_utoa_r (uuid, gfid); - gf_asprintf (&path, "/dev/%s/%s", priv->vg, gfid); - if (!path) { - gf_log (this->name, GF_LOG_WARNING, - "insufficient memory"); - return 0; - } - - /* Destination file does not exist */ - if (stat (path, &stbuf)) { - gf_log (this->name, GF_LOG_WARNING, - "lstat failed for path %s", path); - return -1; - } - - vg = lvm_vg_open (priv->handle, priv->vg, "r", 0); - if (!vg) { - gf_log (this->name, GF_LOG_WARNING, - "VG %s does not exist?", priv->vg); - ret = -1; - goto out; - } - - lv = lvm_lv_from_name (vg, gfid); - if (!lv) { - gf_log (this->name, GF_LOG_WARNING, - "LV %s does not exist", gfid); - ret = -1; - goto out; - } - - *lv_size = lvm_lv_get_size (lv); - if (size == *lv_size) { - ret = 0; - goto out; - } - - ret = 1; - -out: - if (vg) - lvm_vg_close (vg); - - GF_FREE (path); - return ret; -} - -static int -create_thin_lv (char *vg, char *pool, char *lv, uint64_t extent) -{ - int ret = -1; - runner_t runner = {0, }; - char *path = NULL; - struct stat stat = {0, }; - - runinit (&runner); - runner_add_args (&runner, LVM_CREATE, NULL); - runner_add_args (&runner, "--thin", NULL); - runner_argprintf (&runner, "%s/%s", vg, pool); - runner_add_args (&runner, "--name", NULL); - runner_argprintf (&runner, "%s", lv); - runner_add_args (&runner, "--virtualsize", NULL); - runner_argprintf (&runner, "%ldB", extent); - runner_start (&runner); - runner_end (&runner); - - gf_asprintf (&path, "/dev/%s/%s", vg, lv); - if (!path) { - ret = ENOMEM; - goto out; - } - if (lstat (path, &stat) < 0) - ret = EAGAIN; - else - ret = 0; -out: - GF_FREE (path); - return ret; -} - -int -bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv) -{ - int ret = 0; - vg_t vg = NULL; - bd_gfid_t gfid = {0, }; - - uuid_utoa_r (uuid, gfid); - - if (!strcmp (type, BD_THIN)) - return create_thin_lv (priv->vg, priv->pool, gfid, - size); - - vg = lvm_vg_open (priv->handle, priv->vg, "w", 0); - if (!vg) { - gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed", - priv->vg); - return ENOENT; - } - - if (!lvm_vg_create_lv_linear (vg, gfid, size)) { - gf_log (THIS->name, GF_LOG_WARNING, "lvm_vg_create_lv_linear " - "failed"); - ret = errno; - } - - lvm_vg_close (vg); - - return ret; -} - -int32_t -bd_resize (bd_priv_t *priv, uuid_t uuid, size_t size) -{ - uint64_t new_size = 0; - runner_t runner = {0, }; - bd_gfid_t gfid = {0, }; - int ret = 0; - vg_t vg = NULL; - lv_t lv = NULL; - - uuid_utoa_r (uuid, gfid); - - runinit (&runner); - - runner_add_args (&runner, LVM_RESIZE, NULL); - runner_argprintf (&runner, "%s/%s", priv->vg, gfid); - runner_argprintf (&runner, "-L%ldb", size); - runner_add_args (&runner, "-f", NULL); - - runner_start (&runner); - runner_end (&runner); - - vg = lvm_vg_open (priv->handle, priv->vg, "w", 0); - if (!vg) { - gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed", - priv->vg); - return EAGAIN; - } - - lv = lvm_lv_from_name (vg, gfid); - if (!lv) { - gf_log (THIS->name, GF_LOG_WARNING, "LV %s not found", gfid); - ret = EIO; - goto out; - } - new_size = lvm_lv_get_size (lv); - - if (new_size != size) { - gf_log (THIS->name, GF_LOG_WARNING, - "resized LV size %" PRIu64 " does " - "not match requested size %zd", new_size, size); - ret = EIO; - } - -out: - lvm_vg_close (vg); - return ret; -} - -uint64_t -bd_get_default_extent (bd_priv_t *priv) -{ - vg_t vg = NULL; - uint64_t size = 0; - - vg = lvm_vg_open (priv->handle, priv->vg, "w", 0); - if (!vg) { - gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed", - priv->vg); - return 0; - } - - size = lvm_vg_get_extent_size (vg); - - lvm_vg_close (vg); - - return size; -} - -/* - * Adjusts the user specified size to VG specific extent size - */ -uint64_t -bd_adjust_size (bd_priv_t *priv, size_t size) -{ - uint64_t extent = 0; - uint64_t nr_ex = 0; - - extent = bd_get_default_extent (priv); - if (!extent) - return 0; - - nr_ex = size / extent; - if (size % extent) - nr_ex++; - - size = extent * nr_ex; - - return size; -} - -int -bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno) -{ - vg_t vg = NULL; - lv_t lv = NULL; - int ret = -1; - - *op_errno = 0; - vg = lvm_vg_open (priv->handle, priv->vg, "w", 0); - if (!vg) { - gf_log (THIS->name, GF_LOG_WARNING, "opening VG %s failed", - priv->vg); - *op_errno = ENOENT; - return -1; - } - lv = lvm_lv_from_name (vg, lv_name); - if (!lv) { - gf_log (THIS->name, GF_LOG_WARNING, "No such LV %s", lv_name); - *op_errno = ENOENT; - goto out; - } - ret = lvm_vg_remove_lv (lv); - if (ret < 0) { - gf_log (THIS->name, GF_LOG_WARNING, "removing LV %s failed", - lv_name); - *op_errno = errno; - goto out; - } -out: - lvm_vg_close (vg); - - return ret; -} - -inline void -bd_update_amtime(struct iatt *iatt, int flag) -{ - struct timespec ts = {0, }; - - clock_gettime (CLOCK_REALTIME, &ts); - if (flag & GF_SET_ATTR_ATIME) { - iatt->ia_atime = ts.tv_sec; - iatt->ia_atime_nsec = ts.tv_nsec; - } - if (flag & GF_SET_ATTR_MTIME) { - iatt->ia_mtime = ts.tv_sec; - iatt->ia_mtime_nsec = ts.tv_nsec; - } -} - -int -bd_snapshot_create (bd_local_t *local, bd_priv_t *priv) -{ - char *path = NULL; - bd_gfid_t dest = {0, }; - bd_gfid_t origin = {0, }; - int ret = 0; - runner_t runner = {0, }; - struct stat stat = {0, }; - - uuid_utoa_r (local->dloc->gfid, dest); - uuid_utoa_r (local->loc.gfid, origin); - - gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest); - if (!path) { - gf_log (THIS->name, GF_LOG_WARNING, - "Insufficient memory"); - return ENOMEM; - } - - runinit (&runner); - runner_add_args (&runner, LVM_CREATE, NULL); - runner_add_args (&runner, "--snapshot", NULL); - runner_argprintf (&runner, "/dev/%s/%s", priv->vg, origin); - runner_add_args (&runner, "--name", NULL); - runner_argprintf (&runner, "%s", dest); - if (strcmp (local->bdatt->type, BD_THIN)) - runner_argprintf (&runner, "-L%ldB", local->size); - runner_start (&runner); - runner_end (&runner); - - if (lstat (path, &stat) < 0) - ret = EIO; - - GF_FREE (path); - return ret; -} - -int -bd_clone (bd_local_t *local, bd_priv_t *priv) -{ - int ret = ENOMEM; - int fd1 = -1; - int fd2 = -1; - int i = 0; - char *buff = NULL; - ssize_t bytes = 0; - char *spath = NULL; - char *dpath = NULL; - struct iovec *vec = NULL; - bd_gfid_t source = {0, }; - bd_gfid_t dest = {0, }; - void *bufp[IOV_NR] = {0, }; - - vec = GF_CALLOC (IOV_NR, sizeof (struct iovec), gf_common_mt_iovec); - if (!vec) - return ENOMEM; - - for (i = 0; i < IOV_NR; i++) { - bufp[i] = page_aligned_alloc (IOV_SIZE, &buff); - if (!buff) - goto out; - vec[i].iov_base = buff; - vec[i].iov_len = IOV_SIZE; - } - - uuid_utoa_r (local->loc.gfid, source); - uuid_utoa_r (local->dloc->gfid, dest); - - gf_asprintf (&spath, "/dev/%s/%s", priv->vg, source); - gf_asprintf (&dpath, "/dev/%s/%s", priv->vg, dest); - if (!spath || !dpath) - goto out; - - ret = bd_create (local->dloc->gfid, local->size, - local->bdatt->type, priv); - if (ret) - goto out; - - fd1 = open (spath, O_RDONLY | O_DIRECT); - if (fd1 < 0) { - ret = errno; - goto out; - } - fd2 = open (dpath, O_WRONLY | O_DIRECT); - if (fd2 < 0) { - ret = errno; - goto out; - } - - while (1) { - bytes = readv (fd1, vec, IOV_NR); - if (bytes < 0) { - ret = errno; - gf_log (THIS->name, GF_LOG_WARNING, "read failed: %s", - strerror (ret)); - goto out; - } - if (!bytes) - break; - bytes = writev (fd2, vec, IOV_NR); - if (bytes < 0) { - ret = errno; - gf_log (THIS->name, GF_LOG_WARNING, - "write failed: %s", strerror (ret)); - goto out; - } - } - ret = 0; - -out: - for (i = 0; i < IOV_NR; i++) - GF_FREE (bufp[i]); - GF_FREE (vec); - - if (fd1 != -1) - close (fd1); - if (fd2 != -1) - close (fd2); - - GF_FREE (spath); - GF_FREE (dpath); - - return ret; -} - -/* - * Merges snapshot LV to origin LV and returns status - */ -int -bd_merge (bd_priv_t *priv, uuid_t gfid) -{ - bd_gfid_t dest = {0, }; - char *path = NULL; - struct stat stat = {0, }; - runner_t runner = {0, }; - int ret = 0; - - uuid_utoa_r (gfid, dest); - gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest); - - runinit (&runner); - runner_add_args (&runner, LVM_CONVERT, NULL); - runner_add_args (&runner, "--merge", NULL); - runner_argprintf (&runner, "%s", path); - runner_start (&runner); - runner_end (&runner); - - if (!lstat (path, &stat)) - ret = EIO; - - GF_FREE (path); - - return ret; -} - -int -bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict) -{ - vg_t brick = NULL; - lvm_property_value_t prop = {0, }; - lv_t lv = NULL; - int ret = -1; - bd_gfid_t gfid = {0, }; - inode_t *inode = NULL; - char *origin = NULL; - - brick = lvm_vg_open (priv->handle, priv->vg, "w", 0); - if (!brick) { - gf_log (THIS->name, GF_LOG_CRITICAL, "VG %s is not found", - priv->vg); - return ENOENT; - } - - if (fd) - inode = fd->inode; - else - inode = loc->inode; - - uuid_utoa_r (inode->gfid, gfid); - lv = lvm_lv_from_name (brick, gfid); - if (!lv) { - gf_log (THIS->name, GF_LOG_CRITICAL, "LV %s not found", gfid); - ret = ENOENT; - goto out; - } - - prop = lvm_lv_get_property (lv, "origin"); - if (!prop.is_valid || !prop.value.string) { - ret = ENODATA; - goto out; - } - - origin = gf_strdup (prop.value.string); - ret = dict_set_dynstr (dict, BD_ORIGIN, origin); - -out: - lvm_vg_close (brick); - return ret; -} - -#ifndef BLKZEROOUT - -int -bd_do_manual_zerofill (int fd, off_t offset, off_t len, int o_direct) -{ - off_t num_vect = 0; - off_t num_loop = 1; - int idx = 0; - int op_ret = -1; - int vect_size = IOV_SIZE; - off_t remain = 0; - off_t extra = 0; - struct iovec *vector = NULL; - char *iov_base = NULL; - char *alloc_buf = NULL; - - if (len == 0) - return 0; - - if (len < IOV_SIZE) - vect_size = len; - - num_vect = len / (vect_size); - remain = len % vect_size ; - - if (num_vect > MAX_NO_VECT) { - extra = num_vect % MAX_NO_VECT; - num_loop = num_vect / MAX_NO_VECT; - num_vect = MAX_NO_VECT; - } - - vector = GF_CALLOC (num_vect, sizeof(struct iovec), - gf_common_mt_iovec); - if (!vector) - return -1; - - if (o_direct) { - alloc_buf = page_aligned_alloc (vect_size, &iov_base); - if (!alloc_buf) { - gf_log ("bd_do_manual_zerofill", GF_LOG_DEBUG, - "memory alloc failed, vect_size %d: %s", - vect_size, strerror (errno)); - GF_FREE (vector); - return -1; - } - } else { - iov_base = GF_CALLOC (vect_size, sizeof(char), - gf_common_mt_char); - if (!iov_base) { - GF_FREE (vector); - return -1; - } - } - - for (idx = 0; idx < num_vect; idx++) { - vector[idx].iov_base = iov_base; - vector[idx].iov_len = vect_size; - } - - if (lseek (fd, offset, SEEK_SET) < 0) { - op_ret = -1; - goto err; - } - - for (idx = 0; idx < num_loop; idx++) { - op_ret = writev (fd, vector, num_vect); - if (op_ret < 0) - goto err; - } - if (extra) { - op_ret = writev (fd, vector, extra); - if (op_ret < 0) - goto err; - } - if (remain) { - vector[0].iov_len = remain; - op_ret = writev (fd, vector , 1); - if (op_ret < 0) - goto err; - } - op_ret = 0; -err: - if (o_direct) - GF_FREE (alloc_buf); - else - GF_FREE (iov_base); - GF_FREE (vector); - return op_ret; -} - -#else - -/* - * Issue Linux ZEROOUT ioctl to write '0' to a scsi device at given offset - * and number of bytes. Each SCSI device's maximum write same bytes are exported - * in sysfs file. Sending ioctl request greater than this bytes results in slow - * performance. Read this file to get the maximum bytes and break down single - * ZEROOUT request into multiple ZEROOUT request not exceeding maximum bytes. - * From VG & LV name of device mapper identified and sysfs file read. - * /sys/block/<block-device>/queue/write_same_max_bytes - */ -int -bd_do_ioctl_zerofill (bd_priv_t *priv, bd_attr_t *bdatt, int fd, char *vg, - off_t offset, off_t len) -{ - char *dm = NULL; - char dmname[4096] = {0, }; - char lvname[4096] = {0, }; - char sysfs[4096] = {0, }; - bd_gfid_t uuid = {0, }; - char *p = NULL; - off_t max_bytes = 0; - int sysfd = -1; - uint64_t param[2] = {0, 0}; - off_t nr_loop = 0; - char buff[16] = {0, }; - - uuid_utoa_r (bdatt->iatt.ia_gfid, uuid); - sprintf (lvname, "/dev/%s/%s", vg, uuid); - - readlink (lvname, dmname, sizeof (dmname) - 1); - - p = strrchr (dmname, '/'); - if (p) - dm = p + 1; - else - dm = dmname; - - sprintf(sysfs, "/sys/block/%s/queue/write_same_max_bytes", dm); - sysfd = open (sysfs, O_RDONLY); - if (sysfd < 0) { - gf_log ("bd_do_ioctl_zerofill", GF_LOG_DEBUG, - "sysfs file %s does not exist", lvname); - goto skip; - } - - read (sysfd, buff, sizeof (buff)); - close (sysfd); - - max_bytes = atoll (buff); - -skip: - /* - * If requested len is less than write_same_max_bytes, - * issue single ioctl to zeroout. Otherwise split the ioctls - */ - if (!max_bytes || len <= max_bytes) { - param[0] = offset; - param[1] = len; - - if (ioctl (fd, BLKZEROOUT, param) < 0) - return errno; - return 0; - } - - /* Split ioctls to max write_same_max_bytes */ - nr_loop = len / max_bytes; - for (; nr_loop; nr_loop--) { - param[0] = offset; - param[1] = max_bytes; - - if (ioctl (fd, BLKZEROOUT, param) < 0) - return errno; - - offset += max_bytes; - } - - if (!(len % max_bytes)) - return 0; - - param[0] = offset; - param[1] = len % max_bytes; - - if (ioctl (fd, BLKZEROOUT, param) < 0) - return errno; - - return 0; -} -#endif - -int -bd_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, size_t len, struct iatt *prebuf, - struct iatt *postbuf) -{ - int ret = -1; - bd_fd_t *bd_fd = NULL; - bd_priv_t *priv = this->private; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (priv, out); - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "bd_fd is NULL from fd=%p", fd); - goto out; - } - - bd_inode_ctx_get (fd->inode, this, &bdatt); -#ifndef BLKZEROOUT - ret = bd_do_manual_zerofill(bd_fd->fd, offset, len, - bd_fd->flag & O_DIRECT); -#else - ret = bd_do_ioctl_zerofill(priv, bdatt, bd_fd->fd, priv->vg, offset, - len); -#endif - if (ret) { - gf_log(this->name, GF_LOG_ERROR, - "zerofill failed on fd %d length %zu %s", - bd_fd->fd, len, strerror (ret)); - goto out; - } - - if (bd_fd->flag & (O_SYNC|O_DSYNC)) { - ret = fsync (bd_fd->fd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "fsync() in writev on fd %d failed: %s", - bd_fd->fd, strerror (errno)); - return errno; - } - } - - memcpy (&prebuf, &bdatt->iatt, sizeof (prebuf)); - bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME); - memcpy (&postbuf, &bdatt->iatt, sizeof (postbuf)); - -out: - - return ret; -} diff --git a/xlators/storage/bd/src/bd-mem-types.h b/xlators/storage/bd/src/bd-mem-types.h deleted file mode 100644 index 58b44834247..00000000000 --- a/xlators/storage/bd/src/bd-mem-types.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#ifndef __BD_MEM_TYPES_H__ -#define __BD_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gf_bd_mem_types_ { - gf_bd_private = gf_common_mt_end + 1, - gf_bd_attr, - gf_bd_fd, - gf_bd_loc_t, - gf_bd_int32_t, - gf_bd_aio_cb, - gf_bd_mt_end -}; - -#endif diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c deleted file mode 100644 index 4ef94810f99..00000000000 --- a/xlators/storage/bd/src/bd.c +++ /dev/null @@ -1,2452 +0,0 @@ -/* - BD translator V2 - Exports Block devices on server side as regular - files to client - - Now only exporting Logical volumes supported. - - Copyright IBM, Corp. 2013 - - This file is part of GlusterFS. - - Author: - M. Mohan Kumar <mohan@in.ibm.com> - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif -#include <lvm2app.h> -#include <openssl/md5.h> -#include <time.h> -#include <linux/fs.h> -#include <sys/ioctl.h> -#ifdef HAVE_LIBAIO -#include <libaio.h> -#endif - -#include "bd.h" -#include "bd-aio.h" -#include "bd-mem-types.h" -#include "defaults.h" -#include "glusterfs3-xdr.h" -#include "run.h" -#include "protocol-common.h" -#include "checksum.h" -#include "syscall.h" -#include "lvm-defaults.h" - -/* - * Call back function for setxattr and removexattr. - * does not do anything. FIXME: How to handle remove/setxattr failure - */ -int -bd_null_rmsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - STACK_DESTROY (frame->root); - return 0; -} - -/* - * returns 0 if a file is mapped to BD or not. - */ -int -bd_get_bd_info (call_frame_t *frame, xlator_t *this, dict_t *xattr, uuid_t gfid, - char **type, uint64_t *size) -{ - char *bd_xattr = NULL; - char *bd = NULL; - int ret = -1; - loc_t loc = {0, }; - dict_t *dict = NULL; - char *p = NULL; - call_frame_t *bd_frame = NULL; - - if (!xattr) - return 1; - - if (dict_get_str (xattr, BD_XATTR, &p)) - return 1; - - bd_xattr = gf_strdup (p); - - memcpy (loc.gfid, gfid, sizeof (uuid_t)); - - bd_frame = copy_frame (frame); - BD_VALIDATE_MEM_ALLOC (bd_frame, ret, out); - - ret = bd_validate_bd_xattr (this, bd_xattr, type, size, gfid); - if (ret < 0) {/* LV does not exist */ - STACK_WIND (bd_frame, bd_null_rmsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->removexattr, &loc, - BD_XATTR, NULL); - - gf_log (this->name, GF_LOG_WARNING, - "Mapped LV not available for posix file <gfid:%s>, " - "deleting mapping", uuid_utoa (gfid)); - } else if (ret == 1) { - /* BD_XATTR size and LV size mismatch. Update BD_XATTR */ - gf_asprintf (&bd, "%s:%ld", *type, *size); - - dict = dict_new (); - BD_VALIDATE_MEM_ALLOC (dict, ret, out); - - ret = dict_set_dynstr (dict, BD_XATTR, bd); - if (ret) - goto out; - - STACK_WIND (bd_frame, bd_null_rmsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setxattr, &loc, dict, 0, - NULL); - } - -out: - dict_del (xattr, BD_XATTR); - GF_FREE (bd_xattr); - GF_FREE (bd); - return ret; -} - -/* - * bd_lookup_cbk: Call back from posix_lookup. - */ -int32_t -bd_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - int ret = -1; - bd_attr_t *bdatt = NULL; - uint64_t size = 0; - char *type = BD_TYPE_NONE; - - /* only regular files are part of BD object */ - if (op_ret < 0 || buf->ia_type != IA_IFREG) - goto out; - - /* iatt already cached */ - if (!bd_inode_ctx_get (inode, this, &bdatt)) - goto next; - - if (bd_get_bd_info (frame, this, xattr, buf->ia_gfid, &type, &size)) - goto out; - - /* BD file, update buf */ - bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr); - if (!bdatt) { - op_errno = ENOMEM; - goto out; - } - memcpy (&bdatt->iatt, buf, sizeof (struct iatt)); - bdatt->type = type; - - /* Cache LV size in inode_ctx */ - ret = bd_inode_ctx_set (inode, this, bdatt); - if (ret < 0) { - GF_FREE (bdatt); - op_errno = EINVAL; - goto out; - } - - bdatt->iatt.ia_size = size; - bdatt->iatt.ia_blocks = size / 512; - -next: - dict_del (xattr, GF_CONTENT_KEY); - memcpy (buf, &bdatt->iatt, sizeof (struct iatt)); - -out: - BD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, - xattr, postparent); - return 0; -} - -/* - * bd_lookup: Issues posix_lookup to find out if file is mapped to BD - * bd_lookup -> posix_lookup -> bd_lookup_cbk -*/ -int32_t -bd_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) -{ - dict_t *bd_xattr = NULL; - bd_attr_t *bdatt = NULL; - int op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); - VALIDATE_OR_GOTO (this->private, out); - - if (bd_inode_ctx_get (loc->inode, this, &bdatt) < 0) { - if (!xattr_req) { - bd_xattr = dict_new (); - BD_VALIDATE_MEM_ALLOC (bd_xattr, op_errno, out); - xattr_req = bd_xattr; - } - if (dict_set_int8 (xattr_req, BD_XATTR, 1) < 0) - goto out; - } - - STACK_WIND (frame, bd_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, loc, xattr_req); - - if (bd_xattr) - dict_unref (bd_xattr); - return 0; -out: - BD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - - return 0; -} - -int -bd_forget (xlator_t *this, inode_t *inode) -{ - int ret = -1; - uint64_t ctx = 0; - bd_attr_t *bdatt = NULL; - - ret = bd_inode_ctx_get (inode, this, &bdatt); - if (!ret) { - inode_ctx_del (inode, this, &ctx); - GF_FREE (bdatt); - } - return 0; -} - -int -bd_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, gf_dirent_t *entries, dict_t *xdata) -{ - gf_dirent_t *entry = NULL; - uint64_t size = 0; - char *type = NULL; - - if (op_ret < 0) - goto out; - - list_for_each_entry (entry, &entries->list, list) { - if (entry->d_type != DT_REG) - continue; - if (!bd_get_bd_info (frame, this, entry->dict, - entry->d_stat.ia_gfid, &type, &size)) { - entry->d_stat.ia_size = size; - entry->d_stat.ia_blocks = size / 512; - GF_FREE (type); - } - } - -out: - BD_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; -} - -/* - * bd_readdirp: In bd_readdirp_cbk if the file and BD_XATTR_SIZE is set - * ia_size is updated with the LV(BD_XATTR_SIZE) size - */ -int32_t -bd_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *dict) -{ - int op_errno = EINVAL; - bd_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - if (!dict) { - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - local->dict = dict_new (); - BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out); - dict = local->dict; - } - - if (dict_set_int8 (dict, BD_XATTR, 0)) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set key %s", BD_XATTR); - goto out; - } - - STACK_WIND (frame, bd_readdirp_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict); - - return 0; -out: - BD_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, dict); - return 0; -} - -int -bd_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *buf, dict_t *xdata) -{ - bd_local_t *local = frame->local; - bd_attr_t *bdatt = NULL; - - /* only regular files are part of BD object */ - if (op_ret < 0 || buf->ia_type != IA_IFREG) - goto out; - - BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out); - - /* update buf with LV size */ - if (!bd_inode_ctx_get (local->inode, this, &bdatt)) - memcpy (buf, bdatt, sizeof (struct iatt)); - -out: - BD_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata); - return 0; -} - -int -bd_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - int op_errno = EINVAL; - bd_local_t *local = NULL; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); - VALIDATE_OR_GOTO (this->private, out); - - if (!bd_inode_ctx_get (loc->inode, this, &bdatt)) { - BD_STACK_UNWIND (stat, frame, 0, 0, &bdatt->iatt, xdata); - return 0; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - local->inode = inode_ref (loc->inode); - - STACK_WIND(frame, bd_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; -out: - BD_STACK_UNWIND (stat, frame, -1, op_errno, NULL, xdata); - return 0; -} - -int -bd_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct statvfs *buff, dict_t *xdata) -{ - uint64_t size = 0; - uint64_t fr_size = 0; - bd_priv_t *priv = NULL; - vg_t vg = NULL; - - if (op_ret < 0) - goto out; - - priv = this->private; - - vg = lvm_vg_open (priv->handle, priv->vg, "r", 0); - if (!vg) { - gf_log (this->name, GF_LOG_WARNING, "opening VG %s failed", - priv->vg); - op_ret = -1; - op_errno = EAGAIN; - goto out; - } - size = lvm_vg_get_size (vg); - fr_size = lvm_vg_get_free_size (vg); - lvm_vg_close (vg); - - buff->f_blocks += size / buff->f_frsize; - buff->f_bfree += fr_size / buff->f_frsize; - buff->f_bavail += fr_size / buff->f_frsize; - -out: - BD_STACK_UNWIND (statfs, frame, op_ret, op_errno, buff, xdata); - return 0; -} - -/* - * bd_statfs: Mimics statfs by returning used/free extents in the VG - */ -int -bd_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - - STACK_WIND (frame, bd_statfs_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, loc, xdata); - return 0; -out: - BD_STACK_UNWIND (statfs, frame, -1, EINVAL, NULL, NULL); - return 0; -} - -int -bd_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *buf, dict_t *xdata) -{ - bd_attr_t *bdatt = NULL; - bd_local_t *local = frame->local; - - /* only regular files are part of BD object */ - if (op_ret < 0 || buf->ia_type != IA_IFREG) - goto out; - - BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out); - - /* update buf with LV size */ - if (!bd_inode_ctx_get (local->inode, this, &bdatt)) - memcpy (buf, &bdatt->iatt, sizeof (struct iatt)); - -out: - BD_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata); - return 0; -} - -int -bd_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - int op_errno = EINVAL; - bd_local_t *local = NULL; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - /* if its already cached return it */ - if (!bd_inode_ctx_get (fd->inode, this, &bdatt)) { - BD_STACK_UNWIND (fstat, frame, 0, 0, &bdatt->iatt, xdata); - return 0; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->inode = inode_ref (fd->inode); - - STACK_WIND (frame, bd_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - - return 0; -out: - BD_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, xdata); - return 0; -} - -/* - * bd_readv: If posix file, invokes posix_readv otherwise reads from the BD - * file - */ -int -bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - int ret = -1; - int _fd = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - bd_fd_t *bd_fd = NULL; - struct iovec vec = {0, }; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - uint64_t bd_size = 0; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd) { - STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, - fd, size, offset, flags, xdata); - return 0; - } - if (!size) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); - goto out; - } - iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); - if (!iobuf) { - op_errno = ENOMEM; - goto out; - } - _fd = bd_fd->fd; - op_ret = pread (_fd, iobuf->ptr, size, offset); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "read failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - vec.iov_base = iobuf->ptr; - vec.iov_len = op_ret; - - iobref = iobref_new (); - iobref_add (iobref, iobuf); - - if (bd_inode_ctx_get (fd->inode, this, &bdatt)) { - op_errno = EINVAL; - op_ret = -1; - goto out; - } - bd_size = bdatt->iatt.ia_size; - if (!bd_size || (offset + vec.iov_len) >= bd_size) - op_errno = ENOENT; - - op_ret = vec.iov_len; - bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_ATIME); - -out: - BD_STACK_UNWIND (readv, frame, op_ret, op_errno, - &vec, 1, &bdatt->iatt, iobref, NULL); - - if (iobref) - iobref_unref (iobref); - if (iobuf) - iobuf_unref (iobuf); - - return 0; -} - -#ifdef BLKDISCARD -/* - * bd_discard: Sends BLKDISCARD ioctl to the block device - */ -int -bd_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) -{ - int ret = -1; - int op_errno = EINVAL; - bd_fd_t *bd_fd = NULL; - uint64_t param[2] = {0, }; - bd_attr_t *bdatt = NULL; - struct iatt prebuf = {0, }; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (fd, out); - - /* posix */ - if (bd_inode_ctx_get (fd->inode, this, &bdatt)) { - STACK_WIND (frame, default_discard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->discard, - fd, offset, len, xdata); - return 0; - } - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd) { - op_errno = EINVAL; - goto out; - } - - param[0] = offset; - param[1] = len; - ret = ioctl (bd_fd->fd, BLKDISCARD, param); - if (ret < 0) { - if (errno == ENOTTY) - op_errno = ENOSYS; - else - op_errno = errno; - goto out; - } - memcpy (&prebuf, &bdatt->iatt, sizeof (prebuf)); - bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME); - - BD_STACK_UNWIND (discard, frame, ret, op_errno, &prebuf, - &bdatt->iatt, xdata); - return 0; - -out: - BD_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} -#else - -int -bd_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) -{ - BD_STACK_UNWIND (discard, frame, -1, ENOSYS, NULL, NULL, NULL); - return 0; -} -#endif - -/* - * Call back from posix_open for opening the backing posix file - * If it failed, close BD fd - */ -int -bd_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) -{ - bd_fd_t *bd_fd = NULL; - bd_attr_t *bdatt = NULL; - - if (!op_ret) - goto out; - - bd_inode_ctx_get (fd->inode, this, &bdatt); - if (!bdatt) /* posix file */ - goto out; - - /* posix open failed */ - if (bd_fd_ctx_get (this, fd, &bd_fd) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "bd_fd is NULL from fd=%p", fd); - goto out; - } - close (bd_fd->fd); - GF_FREE (bd_fd); - -out: - BD_STACK_UNWIND (open, frame, op_ret, op_errno, fd, NULL); - - return 0; -} - -/* - * bd_open: Opens BD file if given posix file is mapped to BD. Also opens - * posix file. - * fd contains both posix and BD fd - */ -int32_t -bd_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) -{ - int32_t ret = EINVAL; - bd_fd_t *bd_fd = NULL; - bd_attr_t *bdatt = NULL; - bd_gfid_t gfid = {0, }; - char *devpath = NULL; - bd_priv_t *priv = this->private; - int _fd = -1; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - /* not bd file */ - if (fd->inode->ia_type != IA_IFREG || - bd_inode_ctx_get (fd->inode, this, &bdatt)) - goto posix; - - uuid_utoa_r (fd->inode->gfid, gfid); - gf_asprintf (&devpath, "/dev/%s/%s", priv->vg, gfid); - BD_VALIDATE_MEM_ALLOC (devpath, ret, out); - - _fd = open (devpath, flags | O_LARGEFILE, 0); - if (_fd < 0) { - ret = errno; - gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", devpath, - strerror (ret)); - goto out; - } - bd_fd = GF_CALLOC (1, sizeof(bd_fd_t), gf_bd_fd); - BD_VALIDATE_MEM_ALLOC (bd_fd, ret, out); - - bd_fd->fd = _fd; - bd_fd->flag = flags | O_LARGEFILE; - - if (fd_ctx_set (fd, this, (uint64_t)(long)bd_fd) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set the fd context fd=%p", fd); - goto out; - } - - ret = 0; - -posix: - - /* open posix equivalant of this file, fd needed for fd related - operations like fsetxattr, ftruncate etc */ - STACK_WIND (frame, bd_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - - return 0; -out: - BD_STACK_UNWIND (open, frame, -1, ret, fd, NULL); - - GF_FREE (devpath); - if (ret) { - if (_fd >= 0) - close (_fd); - GF_FREE (bd_fd); - } - - return 0; -} - -/* - * call back from posix_setattr after updating iatt to posix file. - */ -int -bd_fsync_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) -{ - bd_local_t *local = frame->local; - bd_attr_t *bdatt = local->bdatt; - - BD_STACK_UNWIND (fsync, frame, op_ret, op_errno, &bdatt->iatt, - &bdatt->iatt, NULL); - return 0; -} - -int -bd_do_fsync (int fd, int datasync) -{ - int op_errno = 0; - - if (datasync) { - if (sys_fdatasync (fd)) { - op_errno = errno; - gf_log (THIS->name, GF_LOG_ERROR, - "fdatasync on fd=%d failed: %s", - fd, strerror (errno)); - } - - } else - - { - if (sys_fsync (fd)) { - op_errno = errno; - gf_log (THIS->name, GF_LOG_ERROR, - "fsync on fd=%d failed: %s", - fd, strerror (op_errno)); - } - } - - return op_errno; -} - -/* - * bd_fsync: Syncs if BD fd, forwards the request to posix - * fsync -> posix_setattr -> posix_fsync -*/ -int32_t -bd_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync, dict_t *xdata) -{ - int ret = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - bd_fd_t *bd_fd = NULL; - bd_priv_t *priv = NULL; - bd_attr_t *bdatt = NULL; - bd_local_t *local = NULL; - int valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; - struct iatt prebuf = {0, }; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ret = bd_inode_ctx_get (fd->inode, this, &bdatt); - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd || !bdatt) { - STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, fd, datasync, - xdata); - return 0; - } - - memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt)); - - op_errno = bd_do_fsync (bd_fd->fd, datasync); - if (op_errno) - goto out; - - /* For BD, Update the a|mtime during full fsync only */ - if (!datasync) { - local = bd_local_init (frame, this); - /* In case of mem failure, should posix flush called ? */ - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr); - BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out); - - local->bdatt->type = gf_strdup (bdatt->type); - memcpy (&local->bdatt->iatt, &bdatt->iatt, sizeof (struct iatt)); - bd_update_amtime (&local->bdatt->iatt, valid); - gf_uuid_copy (local->loc.gfid, fd->inode->gfid); - STACK_WIND (frame, bd_fsync_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, &local->loc, - &local->bdatt->iatt, - valid, NULL); - return 0; - } - -out: - BD_STACK_UNWIND (fsync, frame, op_ret, op_errno, &prebuf, - &bdatt->iatt, NULL); - return 0; -} - -int -bd_flush_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) -{ - BD_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata); - return 0; -} - -int -bd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - int ret = -1; - bd_fd_t *bd_fd = NULL; - bd_priv_t *priv = NULL; - bd_attr_t *bdatt = NULL; - int valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; - bd_local_t *local = NULL; - int op_errno = EINVAL; - loc_t loc = {0, }; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ret = bd_inode_ctx_get (fd->inode, this, &bdatt); - if (!bdatt) - goto out; - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd || !bdatt) { - gf_log (this->name, GF_LOG_WARNING, - "bdfd/bdatt is NULL from fd=%p", fd); - goto out; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->fd = fd_ref (fd); - gf_uuid_copy (loc.gfid, bdatt->iatt.ia_gfid); - - /* Update the a|mtime during flush */ - STACK_WIND (frame, bd_flush_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, &loc, &bdatt->iatt, - valid, NULL); - - return 0; - -out: - STACK_WIND (frame, default_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd, xdata); - - return 0; -} - -int32_t -bd_release (xlator_t *this, fd_t *fd) -{ - int ret = -1; - bd_fd_t *bd_fd = NULL; - uint64_t tmp_bfd = 0; - bd_attr_t *bdatt = NULL; - bd_priv_t *priv = this->private; - - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (priv, out); - - ret = bd_inode_ctx_get (fd->inode, this, &bdatt); - if (ret || !bdatt) /* posix file */ - goto out; - - /* FIXME: Update amtime during release */ - - ret = fd_ctx_del (fd, this, &tmp_bfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "bfd is NULL from fd=%p", fd); - goto out; - } - bd_fd = (bd_fd_t *)(long)tmp_bfd; - - close (bd_fd->fd); - GF_FREE (bd_fd); -out: - return 0; -} - -/* - * Call back for removexattr after removing BD_XATTR incase of - * bd create failure - */ -int -bd_setx_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - bd_local_t *local = frame->local; - - if (local->fd) - BD_STACK_UNWIND (setxattr, frame, -1, EIO, xdata); - else - BD_STACK_UNWIND (setxattr, frame, -1, EIO, xdata); - return 0; - -} - -/* - * Call back after setting BD_XATTR. Creates BD. If BD creation is a failure - * invokes posix_removexattr to remove created BD_XATTR - */ -int -bd_setx_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - bd_local_t *local = frame->local; - bd_attr_t *bdatt = NULL; - - if (op_ret < 0) - goto next; - - /* Create LV */ - op_errno = bd_create (local->inode->gfid, local->bdatt->iatt.ia_size, - local->bdatt->type, this->private); - if (!op_errno) - goto out; - - /* LV creation failed, remove BD_XATTR */ - if (local->fd) - STACK_WIND (frame, bd_setx_rm_xattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, - local->fd, BD_XATTR, NULL); - else - STACK_WIND (frame, bd_setx_rm_xattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - &local->loc, BD_XATTR, NULL); - - return 0; -out: - - bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr); - if (!bdatt) { - op_ret = -1; - op_errno = ENOMEM; - goto next; - } - - memcpy (&bdatt->iatt, &local->bdatt->iatt, sizeof (struct iatt)); - bdatt->type = gf_strdup (local->bdatt->type); - - bd_inode_ctx_set (local->inode, THIS, bdatt); - -next: - if (local->fd) - BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - else - BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - return 0; - -} - -/* - * Call back from posix_stat - */ -int -bd_setx_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *iatt, - dict_t *xdata) -{ - char *param = NULL; - char *type = NULL; - char *s_size = NULL; - char *p = NULL; - char *copy = NULL; - bd_local_t *local = frame->local; - bd_priv_t *priv = this->private; - char *bd = NULL; - uint64_t size = 0; - - if (op_ret < 0) - goto out; - - if (!IA_ISREG (iatt->ia_type)) { - op_errno = EOPNOTSUPP; - goto out; - } - - param = copy = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char); - BD_VALIDATE_MEM_ALLOC (param, op_errno, out); - - strncpy (param, local->data->data, local->data->len); - - type = strtok_r (param, ":", &p); - if (!type) { - op_errno = EINVAL; - goto out; - } - - if (strcmp (type, BD_LV) && strcmp (type, BD_THIN)) { - gf_log (this->name, GF_LOG_WARNING, "Invalid bd type %s given", - type); - op_errno = EINVAL; - goto out; - } - - if (!strcmp (type, BD_THIN) && !(priv->caps & BD_CAPS_THIN)) { - gf_log (this->name, GF_LOG_WARNING, "THIN lv not supported by " - "this volume"); - op_errno = EOPNOTSUPP; - goto out; - } - - s_size = strtok_r (NULL, ":", &p); - - /* If size not specified get default size */ - if (!s_size) - size = bd_get_default_extent (priv); - else - gf_string2bytesize (s_size, &size); - - gf_asprintf (&bd, "%s:%ld", type, size); - BD_VALIDATE_MEM_ALLOC (bd, op_errno, out); - - local->dict = dict_new (); - BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out); - - local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr); - BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out); - - if (dict_set_dynstr (local->dict, BD_XATTR, bd) < 0) { - op_errno = EINVAL; - goto out; - } - - local->bdatt->type = gf_strdup (type); - memcpy (&local->bdatt->iatt, iatt, sizeof (struct iatt)); - local->bdatt->iatt.ia_size = size; - - if (local->fd) - STACK_WIND (frame, bd_setx_setx_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - local->fd, local->dict, 0, NULL); - else - STACK_WIND (frame, bd_setx_setx_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->loc, local->dict, 0, NULL); - - return 0; - -out: - if (local->fd) - BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, xdata); - else - BD_STACK_UNWIND (setxattr, frame, -1, op_errno, xdata); - - GF_FREE (bd); - GF_FREE (copy); - return 0; -} - -int -bd_offload_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - bd_local_t *local = frame->local; - - if (local->fd) - BD_STACK_UNWIND (fsetxattr, frame, -1, EIO, NULL); - else - BD_STACK_UNWIND (setxattr, frame, -1, EIO, NULL); - - return 0; -} - -int -bd_offload_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - bd_local_t *local = frame->local; - - if (op_ret < 0) - goto out; - - if (local->offload == BD_OF_SNAPSHOT) - op_ret = bd_snapshot_create (frame->local, this->private); - else - op_ret = bd_clone (frame->local, this->private); - - if (op_ret) { - STACK_WIND (frame, bd_offload_rm_xattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - local->dloc, BD_XATTR, NULL); - return 0; - } - -out: - if (local->fd) - BD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); - else - BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); - - return 0; -} - -int -bd_offload_getx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) -{ - char *bd = NULL; - bd_local_t *local = frame->local; - char *type = NULL; - char *p = NULL; - - if (op_ret < 0) - goto out; - - if (dict_get_str (xattr, BD_XATTR, &p)) { - op_errno = EINVAL; - goto out; - } - - type = gf_strdup (p); - BD_VALIDATE_MEM_ALLOC (type, op_errno, out); - - p = strrchr (type, ':'); - if (!p) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "source file xattr %s corrupted?", type); - goto out; - } - - *p='\0'; - - /* For clone size is taken from source LV */ - if (!local->size) { - p++; - gf_string2bytesize (p, &local->size); - } - gf_asprintf (&bd, "%s:%ld", type, local->size); - local->bdatt->type = gf_strdup (type); - dict_del (local->dict, BD_XATTR); - dict_del (local->dict, LINKTO); - if (dict_set_dynstr (local->dict, BD_XATTR, bd)) { - op_errno = EINVAL; - goto out; - } - - STACK_WIND (frame, bd_offload_setx_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - local->dloc, local->dict, 0, NULL); - - return 0; - -out: - if (local->fd) - BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - else - BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - - GF_FREE (type); - GF_FREE (bd); - - return 0; -} - -int -bd_offload_dest_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *iatt, - dict_t *xattr, struct iatt *postparent) -{ - bd_local_t *local = frame->local; - char *bd = NULL; - int ret = -1; - char *linkto = NULL; - - if (op_ret < 0 && op_errno != ENODATA) { - op_errno = EINVAL; - goto out; - } - - if (!IA_ISREG (iatt->ia_type)) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "destination gfid is not a " - "regular file"); - goto out; - } - - ret = dict_get_str (xattr, LINKTO, &linkto); - if (linkto) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "destination file not " - "present in same brick"); - goto out; - } - - ret = dict_get_str (xattr, BD_XATTR, &bd); - if (bd) { - op_errno = EEXIST; - goto out; - } - - local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr); - BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out); - - STACK_WIND (frame, bd_offload_getx_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - &local->loc, BD_XATTR, NULL); - - return 0; -out: - if (local->fd) - BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - else - BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - - return 0; -} - -int -bd_merge_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - /* FIXME: if delete failed, remove xattr */ - - BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); - return 0; -} - -int -bd_do_merge(call_frame_t *frame, xlator_t *this) -{ - bd_local_t *local = frame->local; - inode_t *parent = NULL; - char *p = NULL; - int op_errno = 0; - - op_errno = bd_merge (this->private, local->inode->gfid); - if (op_errno) - goto out; - - /* - * posix_unlink needs loc->pargfid to be valid, but setxattr FOP does - * not have loc->pargfid set. Get parent's gfid by getting parents inode - */ - parent = inode_parent (local->inode, NULL, NULL); - if (!parent) { - /* - * FIXME: Snapshot LV already deleted. - * remove xattr, instead of returning failure - */ - op_errno = EINVAL; - goto out; - } - gf_uuid_copy (local->loc.pargfid, parent->gfid); - - p = strrchr (local->loc.path, '/'); - if (p) - p++; - local->loc.name = p; - - STACK_WIND (frame, bd_merge_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - &local->loc, 0, NULL); - - return 0; -out: - BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - - return op_errno; -} - -int -bd_offload (call_frame_t *frame, xlator_t *this, loc_t *loc, - fd_t *fd, bd_offload_t offload) -{ - char *param = NULL; - char *param_copy = NULL; - char *p = NULL; - char *size = NULL; - char *gfid = NULL; - int op_errno = 0; - bd_local_t *local = frame->local; - - param = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char); - BD_VALIDATE_MEM_ALLOC (param, op_errno, out); - param_copy = param; - - local->dict = dict_new (); - BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out); - - local->dloc = GF_CALLOC (1, sizeof (loc_t), gf_bd_loc_t); - BD_VALIDATE_MEM_ALLOC (local->dloc, op_errno, out); - - strncpy (param, local->data->data, local->data->len); - - gfid = strtok_r (param, ":", &p); - size = strtok_r (NULL, ":", &p); - if (size) - gf_string2bytesize (size, &local->size); - else if (offload != BD_OF_CLONE) - local->size = bd_get_default_extent (this->private); - - if (dict_set_int8 (local->dict, BD_XATTR, 1) < 0) { - op_errno = EINVAL; - goto out; - } - if (dict_set_int8 (local->dict, LINKTO, 1) < 0) { - op_errno = EINVAL; - goto out; - } - - gf_uuid_parse (gfid, local->dloc->gfid); - local->offload = offload; - - STACK_WIND (frame, bd_offload_dest_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, local->dloc, - local->dict); - - return 0; - -out: - if (fd) - BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - else - BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - - GF_FREE (param_copy); - return 0; -} - -/* - * bd_setxattr: Used to create & map an LV to a posix file using - * BD_XATTR xattr - * bd_setxattr -> posix_stat -> bd_setx_stat_cbk -> posix_setxattr -> - * bd_setx_setx_cbk -> create_lv - * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk - */ -int -bd_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int flags, dict_t *xdata) -{ - int op_errno = 0; - data_t *data = NULL; - bd_local_t *local = NULL; - bd_attr_t *bdatt = NULL; - bd_offload_t cl_type = BD_OF_NONE; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - - if ((data = dict_get (dict, BD_XATTR))) - cl_type = BD_OF_NONE; - else if ((data = dict_get (dict, BD_CLONE))) - cl_type = BD_OF_CLONE; - else if ((data = dict_get (dict, BD_SNAPSHOT))) - cl_type = BD_OF_SNAPSHOT; - else if ((data = dict_get (dict, BD_MERGE))) - cl_type = BD_OF_MERGE; - - bd_inode_ctx_get (loc->inode, this, &bdatt); - if (!cl_type && !data) { - STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setxattr, loc, dict, - flags, xdata); - return 0; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->data = data; - loc_copy (&local->loc, loc); - local->inode = inode_ref (loc->inode); - - if (cl_type) { - /* For cloning/snapshot, source file must be mapped to LV */ - if (!bdatt) { - gf_log (this->name, GF_LOG_WARNING, - "%s not mapped to BD", loc->path); - op_errno = EINVAL; - goto out; - } - if (cl_type == BD_OF_MERGE) - bd_do_merge (frame, this); - else - bd_offload (frame, this, loc, NULL, cl_type); - } else if (data) { - if (bdatt) { - gf_log (this->name, GF_LOG_WARNING, - "%s already mapped to BD", loc->path); - op_errno = EEXIST; - goto out; - } - STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc, xdata); - } - - return 0; -out: - if (op_errno) - STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata); - - return 0; -} - -/* - * bd_fsetxattr: Used to create/map an LV to a posix file using - * BD_XATTR xattr - * bd_fsetxattr -> posix_fstat -> bd_setx_stat_cbk -> posix_fsetxattr -> - * bd_setx_setx_cbk -> create_lv - * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk - * -> bd_fsetxattr_cbk - */ -int32_t -bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int flags, dict_t *xdata) -{ - int op_errno = 0; - data_t *data = NULL; - bd_attr_t *bdatt = NULL; - bd_local_t *local = NULL; - bd_offload_t cl_type = BD_OF_NONE; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (fd, out); - - bd_inode_ctx_get (fd->inode, this, &bdatt); - - if ((data = dict_get (dict, BD_XATTR))) - cl_type = BD_OF_NONE; - else if ((data = dict_get (dict, BD_CLONE))) - cl_type = BD_OF_CLONE; - else if ((data = dict_get (dict, BD_SNAPSHOT))) - cl_type = BD_OF_SNAPSHOT; - else if ((data = dict_get (dict, BD_MERGE))) { - /* - * bd_merge is not supported for fsetxattr, because snapshot LV - * is opened and it causes problem in snapshot merge - */ - op_errno = EOPNOTSUPP; - goto out; - } - - bd_inode_ctx_get (fd->inode, this, &bdatt); - - if (!cl_type && !data) { - /* non bd file object */ - STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - fd, dict, flags, xdata); - return 0; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->inode = inode_ref (fd->inode); - local->fd = fd_ref (fd); - local->data = data; - - if (cl_type) { - /* For cloning/snapshot, source file must be mapped to LV */ - if (!bdatt) { - gf_log (this->name, GF_LOG_WARNING, - "fd %p not mapped to BD", fd); - op_errno = EINVAL; - goto out; - - } - bd_offload (frame, this, NULL, fd, cl_type); - } else if (data) { - if (bdatt) { - gf_log (this->name, GF_LOG_WARNING, - "fd %p already mapped to BD", fd); - op_errno = EEXIST; - goto out; - } - STACK_WIND(frame, bd_setx_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - } - - return 0; -out: - - BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - - return 0; -} - -int32_t -bd_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - if (!strcmp (name, BD_XATTR)) - goto out; - - STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); - return 0; -out: - BD_STACK_UNWIND (removexattr, frame, -1, ENODATA, NULL); - return 0; -} - -int32_t -bd_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - if (!strcmp (name, BD_XATTR)) - goto out; - - STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); - - return 0; -out: - BD_STACK_UNWIND (fremovexattr, frame, -1, ENODATA, NULL); - return 0; -} - -int -bd_trunc_setxattr_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - bd_local_t *local = frame->local; - - if (local->fd) - BD_STACK_UNWIND (ftruncate, frame, -1, EIO, NULL, NULL, NULL); - else - BD_STACK_UNWIND (truncate, frame, -1, EIO, NULL, NULL, NULL); - - return 0; -} - -/* - * Call back for setxattr after setting BD_XATTR_SIZE. - */ -int -bd_trunc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - bd_local_t *local = frame->local; - bd_attr_t *bdatt = NULL; - struct iatt prebuf = {0, }; - char *bd = NULL; - - if (op_ret < 0) - goto out; - - bd_inode_ctx_get (local->inode, this, &bdatt); - if (!bdatt) - goto revert_xattr; - - op_errno = bd_resize (this->private, local->inode->gfid, - local->bdatt->iatt.ia_size); - if (op_errno) - goto revert_xattr; - - memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt)); - /* LV resized, update new size in the cache */ - bdatt->iatt.ia_size = local->bdatt->iatt.ia_size; - - if (local->fd) - BD_STACK_UNWIND (ftruncate, frame, 0, 0, &prebuf, &bdatt->iatt, - NULL); - else - BD_STACK_UNWIND (truncate, frame, 0, 0, &prebuf, &bdatt->iatt, - NULL); - - return 0; - -revert_xattr: - /* revert setxattr */ - op_ret = dict_get_str (local->dict, BD_XATTR, &bd); - GF_FREE (bd); - if (bdatt) - gf_asprintf (&bd, "%s:%ld", bdatt->type, bdatt->iatt.ia_size); - - if (local->fd) - STACK_WIND (frame, bd_trunc_setxattr_setx_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - local->fd, local->dict, 0, NULL); - else - STACK_WIND (frame, bd_trunc_setxattr_setx_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->loc, local->dict, 0, NULL); - - return 0; -out: - if (local->fd) - BD_STACK_UNWIND (ftruncate, frame, -1, EIO, NULL, NULL, NULL); - else - BD_STACK_UNWIND (truncate, frame, -1, EIO, NULL, NULL, NULL); - - return 0; -} - -/* - * call back from posix_[f]truncate_stat - * If offset > LV size, it resizes the LV and calls posix_setxattr - * to update new LV size in xattr else calls posix_setattr for updating - * the posix file so that truncate fop behaves properly - */ -int -bd_trunc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *buf, dict_t *xdata) -{ - char *bd = NULL; - bd_local_t *local = frame->local; - bd_attr_t *bdatt = NULL; - - if (op_ret < 0) - goto out; - - local->dict = dict_new (); - BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out); - - bd_inode_ctx_get (local->inode, this, &bdatt); - if (!bdatt) { - op_errno = EINVAL; - goto out; - } - - gf_asprintf (&bd, "%s:%ld", bdatt->type, local->bdatt->iatt.ia_size); - if (dict_set_dynstr (local->dict, BD_XATTR, bd)) { - op_errno = EINVAL; - goto out; - } - - if (local->fd) - STACK_WIND (frame, bd_trunc_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - local->fd, local->dict, 0, NULL); - else - STACK_WIND (frame, bd_trunc_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - &local->loc, local->dict, 0, NULL); - - return 0; -out: - if (local->fd) - BD_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, - NULL); - else - BD_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, - NULL); - GF_FREE (bd); - return 0; -} - -void -bd_do_trunc (call_frame_t *frame, xlator_t *this, fd_t *fd, loc_t *loc, - off_t offset, bd_attr_t *bdatt) -{ - bd_local_t *local = NULL; - struct iatt prebuf = {0, }; - int op_errno = 0; - int op_ret = -1; - - /* If requested size is less than LV size, return success */ - if (offset <= bdatt->iatt.ia_size) { - memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt)); - bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME); - op_ret = 0; - goto out; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->bdatt = GF_CALLOC (1, sizeof (bd_attr_t), gf_bd_attr); - BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out); - - if (fd) { - local->inode = inode_ref (fd->inode); - local->fd = fd_ref (fd); - } else { - local->inode = inode_ref (loc->inode); - loc_copy (&local->loc, loc); - } - - local->bdatt->iatt.ia_size = - bd_adjust_size (this->private, offset); - - STACK_WIND (frame, bd_trunc_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, NULL); - - return; - -out: - if (fd) - BD_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, - &prebuf, &bdatt->iatt, NULL); - else - BD_STACK_UNWIND (truncate, frame, op_ret, op_errno, - &prebuf, &bdatt->iatt, NULL); - return; -} - -/* - * bd_ftruncate: Resizes a LV if fd belongs to BD. - */ -int32_t -bd_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) -{ - int op_errno = 0; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - if (bd_inode_ctx_get (fd->inode, this, &bdatt)) { - STACK_WIND (frame, default_ftruncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, - offset, xdata); - return 0; - } - - bd_do_trunc (frame, this, fd, NULL, offset, bdatt); - return 0; -out: - BD_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -/* - * bd_truncate: Resizes a LV if file maps to LV. - */ -int32_t -bd_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - int op_errno = 0; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - if (bd_inode_ctx_get (loc->inode, this, &bdatt)) { - STACK_WIND (frame, default_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, - offset, xdata); - return 0; - } - - bd_do_trunc (frame, this, NULL, loc, offset, bdatt); - return 0; - -out: - BD_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -__bd_pwritev (int fd, struct iovec *vector, int count, off_t offset, - uint64_t bd_size) -{ - int index = 0; - int retval = 0; - off_t internal_offset = 0; - - if (!vector) - return -EFAULT; - - retval = pwritev (fd, vector, count, offset); - if (retval == -1) { - int64_t off = offset; - gf_log (THIS->name, GF_LOG_WARNING, - "base %p, length %zd, offset %" PRId64 ", message %s", - vector[index].iov_base, vector[index].iov_len, - off, strerror (errno)); - retval = -errno; - goto err; - } -/* - - - internal_offset = offset; - for (index = 0; index < count; index++) { - if (internal_offset > bd_size) { - op_ret = -ENOSPC; - goto err; - } - if (internal_offset + vector[index].iov_len > bd_size) { - vector[index].iov_len = bd_size - internal_offset; - no_space = 1; - } - retval = pwritev (fd, vector[index].iov_base, - vector[index].iov_len, internal_offset); - if (retval == -1) { - gf_log (THIS->name, GF_LOG_WARNING, - "base %p, length %ld, offset %ld, message %s", - vector[index].iov_base, vector[index].iov_len, - internal_offset, strerror (errno)); - op_ret = -errno; - goto err; - } - op_ret += retval; - internal_offset += retval; - if (no_space) - break; - } -*/ -err: - return retval; -} - -/* - * bd_writev: Writes to LV if its BD file or forwards the request to posix_write - * bd_writev -> posix_writev -> bd_writev_cbk - */ -int -bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, - dict_t *xdict) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - bd_fd_t *bd_fd = NULL; - int ret = -1; - uint64_t size = 0; - struct iatt prebuf = {0, }; - bd_attr_t *bdatt = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (vector, out); - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd) { /* posix fd */ - STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - offset, flags, iobref, xdict); - return 0; - } - - _fd = bd_fd->fd; - - if (bd_inode_ctx_get (fd->inode, this, &bdatt)) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - size = bdatt->iatt.ia_size; - - op_ret = __bd_pwritev (_fd, vector, count, offset, size); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64 - ", %s", offset, strerror (op_errno)); - goto out; - } - - memcpy (&prebuf, &bdatt->iatt, sizeof (struct iatt)); - bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME); -out: - - BD_STACK_UNWIND (writev, frame, op_ret, op_errno, &prebuf, - &bdatt->iatt, NULL); - return 0; -} - -int -bd_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - bd_attr_t *bdatt = NULL; - int *valid = cookie; - bd_local_t *local = frame->local; - - if (op_ret < 0 || !valid || !local) - goto out; - - if (bd_inode_ctx_get (local->inode, this, &bdatt)) - goto out; - - if (*valid & GF_SET_ATTR_UID) - bdatt->iatt.ia_uid = postbuf->ia_uid; - else if (*valid & GF_SET_ATTR_GID) - bdatt->iatt.ia_gid = postbuf->ia_gid; - else if (*valid & GF_SET_ATTR_MODE) { - bdatt->iatt.ia_type = postbuf->ia_type; - bdatt->iatt.ia_prot = postbuf->ia_prot; - } else if (*valid & GF_SET_ATTR_ATIME) { - bdatt->iatt.ia_atime = postbuf->ia_atime; - bdatt->iatt.ia_atime_nsec = postbuf->ia_atime_nsec; - } else if (*valid & GF_SET_ATTR_MTIME) { - bdatt->iatt.ia_mtime = postbuf->ia_mtime; - bdatt->iatt.ia_mtime_nsec = postbuf->ia_mtime_nsec; - } - - bdatt->iatt.ia_ctime = postbuf->ia_ctime; - bdatt->iatt.ia_ctime_nsec = postbuf->ia_ctime_nsec; - - memcpy (postbuf, &bdatt->iatt, sizeof (struct iatt)); -out: - GF_FREE (valid); - BD_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; -} - -int -bd_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, - int32_t valid, dict_t *xdata) -{ - bd_local_t *local = NULL; - bd_attr_t *bdatt = NULL; - int *ck_valid = NULL; - int op_errno = 0; - - if (bd_inode_ctx_get (loc->inode, this, &bdatt)) { - STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, - loc, stbuf, valid, xdata); - return 0; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - ck_valid = GF_CALLOC (1, sizeof (valid), gf_bd_int32_t); - BD_VALIDATE_MEM_ALLOC (ck_valid, op_errno, out); - - local->inode = inode_ref (loc->inode); - *ck_valid = valid; - - STACK_WIND_COOKIE (frame, bd_setattr_cbk, ck_valid, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, - loc, stbuf, valid, xdata); - - return 0; -out: - BD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, xdata); - return 0; -} - -int -bd_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - bd_attr_t *bdatt = NULL; - - if (op_ret < 0) - goto out; - - if (bd_inode_ctx_get (inode, this, &bdatt)) - goto out; - - bdatt->iatt.ia_ctime = buf->ia_ctime; - bdatt->iatt.ia_ctime_nsec = buf->ia_ctime_nsec; - bdatt->iatt.ia_nlink = buf->ia_nlink; - memcpy (buf, &bdatt->iatt, sizeof (struct iatt)); - -out: - BD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent, NULL); - return 0; -} - -int -bd_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - STACK_WIND (frame, bd_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); - return 0; -} - -int -bd_handle_special_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc, - fd_t *fd, const char *name, dict_t *xdata) -{ - dict_t *xattr = NULL; - int op_ret = -1; - int op_errno = ENOMEM;; - bd_priv_t *priv = this->private; - - xattr = dict_new (); - if (!xattr) - goto out; - - if (!strcmp (name, VOL_TYPE)) - op_ret = dict_set_int64 (xattr, (char *)name, 1); - else if (!strcmp (name, VOL_CAPS)) - op_ret = dict_set_int64 (xattr, (char *)name, priv->caps); - else - op_ret = bd_get_origin (this->private, loc, fd, xattr); - -out: - if (loc) - BD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, - xdata); - else - BD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr, - xdata); - - op_ret = dict_reset (xattr); - dict_unref (xattr); - - return 0; -} - -int -bd_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS) - || !strcmp (name, BD_ORIGIN))) - bd_handle_special_xattrs (frame, this, NULL, fd, name, xdata); - else - STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, - fd, name, xdata); - return 0; -} - -int -bd_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS) - || !strcmp (name, BD_ORIGIN))) - bd_handle_special_xattrs (frame, this, loc, NULL, name, xdata); - else - STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, name, xdata); - - return 0; -} - -int -bd_unlink_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - bd_gfid_t gfid = {0, }; - bd_local_t *local = frame->local; - - if (buf->ia_nlink > 1) - goto posix; - - BD_VALIDATE_LOCAL_OR_GOTO (local, op_errno, out); - - uuid_utoa_r (inode->gfid, gfid); - if (bd_delete_lv (this->private, gfid, &op_errno) < 0) { - if (op_errno != ENOENT) - goto out; - } - -posix: - /* remove posix */ - STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - &local->loc, 0, NULL); - - return 0; -out: - BD_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int -bd_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflag, dict_t *xdata) -{ - int op_errno = 0; - bd_attr_t *bdatt = NULL; - bd_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - if (bd_inode_ctx_get (loc->inode, this, &bdatt)) { - STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - loc, xflag, xdata); - return 0; - } - - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - loc_copy (&local->loc, loc); - - STACK_WIND (frame, bd_unlink_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - return 0; -out: - BD_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -bd_priv (xlator_t *this) -{ - return 0; -} - -int32_t -bd_inode (xlator_t *this) -{ - return 0; -} - -int32_t -bd_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - int32_t len, dict_t *xdata) -{ - int op_ret = -1; - int op_errno = 0; - int ret = 0; - int _fd = -1; - char *alloc_buf = NULL; - char *buf = NULL; - int32_t weak_checksum = 0; - bd_fd_t *bd_fd = NULL; - unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = bd_fd_ctx_get (this, fd, &bd_fd); - if (ret < 0 || !bd_fd) { - STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rchecksum, fd, offset, - len, xdata); - return 0; - } - - memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - - alloc_buf = page_aligned_alloc (len, &buf); - if (!alloc_buf) { - op_errno = ENOMEM; - goto out; - } - - _fd = bd_fd->fd; - - LOCK (&fd->lock); - { - ret = pread (_fd, buf, len, offset); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pread of %d bytes returned %d (%s)", - len, ret, strerror (errno)); - op_errno = errno; - } - } - UNLOCK (&fd->lock); - - if (ret < 0) - goto out; - - weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, - (size_t) len); - gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, - (unsigned char *) strong_checksum); - - op_ret = 0; -out: - BD_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, - weak_checksum, strong_checksum, NULL); - - GF_FREE (alloc_buf); - - return 0; -} - -static int -bd_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) -{ - int32_t ret = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - bd_attr_t *bdatt = NULL; - - /* iatt already cached */ - if (bd_inode_ctx_get (fd->inode, this, &bdatt) < 0) { - STACK_WIND (frame, default_zerofill_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->zerofill, - fd, offset, len, xdata); - return 0; - } - - ret = bd_do_zerofill(frame, this, fd, offset, len, - &statpre, &statpost); - if (ret) - goto err; - - STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, ret, NULL, NULL, NULL); - return 0; -} - -/** - * notify - when parent sends PARENT_UP, send CHILD_UP event from here - */ -int32_t -notify (xlator_t *this, - int32_t event, - void *data, - ...) -{ - switch (event) - { - case GF_EVENT_PARENT_UP: - { - /* Tell the parent that bd xlator is up */ - default_notify (this, GF_EVENT_CHILD_UP, data); - } - break; - default: - break; - } - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_bd_mt_end + 1); - - if (ret != 0) - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - - return ret; -} - -int -reconfigure (xlator_t *this, dict_t *options) -{ - int ret = -1; - bd_priv_t *priv = this->private; - - GF_OPTION_RECONF ("bd-aio", priv->aio_configured, options, - bool, out); - - if (priv->aio_configured) - bd_aio_on (this); - else - bd_aio_off (this); - - ret = 0; -out: - return ret; -} - -/** - * bd xlator init - Validate configured VG - */ -int -init (xlator_t *this) -{ - int ret = 0; - char *vg_data = NULL; - char *device = NULL; - bd_priv_t *_private = NULL; - - if (!this->children) { - gf_log (this->name, GF_LOG_CRITICAL, - "FATAL: storage/bd needs posix as subvolume"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling. Please check the volume file."); - } - - GF_OPTION_INIT ("export", vg_data, str, error); - GF_OPTION_INIT ("device", device, str, error); - - /* Now we support only LV device */ - if (strcasecmp (device, BACKEND_VG)) { - gf_log (this->name, GF_LOG_CRITICAL, - "FATAL: unknown %s backend %s", BD_XLATOR, device); - return -1; - } - - this->local_pool = mem_pool_new (bd_local_t, 64); - if (!this->local_pool) { - gf_log (this->name, GF_LOG_CRITICAL, - "FATAL: Failed to create bd memory pool"); - return -1; - } - - ret = 0; - _private = GF_CALLOC (1, sizeof (*_private), gf_bd_private); - if (!_private) - goto error; - - this->private = _private; - _private->vg = gf_strdup (vg_data); - if (!_private->vg) - goto error; - - _private->handle = lvm_init (NULL); - if (!_private->handle) { - gf_log (this->name, GF_LOG_CRITICAL, "lvm_init failed"); - goto error; - } - _private->caps = BD_CAPS_BD; - if (bd_scan_vg (this, _private)) - goto error; - - _private->aio_init_done = _gf_false; - _private->aio_capable = _gf_false; - - GF_OPTION_INIT ("bd-aio", _private->aio_configured, bool, error); - if (_private->aio_configured) { - if (bd_aio_on (this)) { - gf_log (this->name, GF_LOG_ERROR, - "BD AIO init failed"); - ret = -1; - goto error; - } - } - - _private->caps |= BD_CAPS_OFFLOAD_COPY | BD_CAPS_OFFLOAD_SNAPSHOT | - BD_CAPS_OFFLOAD_ZERO; - - return 0; -error: - if (_private) { - GF_FREE (_private->vg); - if (_private->handle) - lvm_quit (_private->handle); - GF_FREE (_private); - } - - mem_pool_destroy (this->local_pool); - - return -1; -} - -void -fini (xlator_t *this) -{ - bd_priv_t *priv = this->private; - mem_pool_destroy (this->local_pool); - this->local_pool = NULL; - if (!priv) - return; - lvm_quit (priv->handle); - GF_FREE (priv->vg); - this->private = NULL; - GF_FREE (priv); - return; -} - -struct xlator_dumpops dumpops = { - .priv = bd_priv, - .inode = bd_inode, -}; - -struct xlator_fops fops = { - .readdirp = bd_readdirp, - .lookup = bd_lookup, - .stat = bd_stat, - .statfs = bd_statfs, - .open = bd_open, - .fstat = bd_fstat, - .rchecksum = bd_rchecksum, - .readv = bd_readv, - .fsync = bd_fsync, - .setxattr = bd_setxattr, - .fsetxattr = bd_fsetxattr, - .removexattr = bd_removexattr, - .fremovexattr=bd_fremovexattr, - .truncate = bd_truncate, - .ftruncate = bd_ftruncate, - .writev = bd_writev, - .getxattr = bd_getxattr, - .fgetxattr = bd_fgetxattr, - .unlink = bd_unlink, - .link = bd_link, - .flush = bd_flush, - .setattr = bd_setattr, - .discard = bd_discard, - .zerofill = bd_zerofill, -}; - -struct xlator_cbks cbks = { - .release = bd_release, - .forget = bd_forget, -}; - -struct volume_options options[] = { - { .key = {"export"}, - .type = GF_OPTION_TYPE_STR}, - { .key = {"device"}, - .type = GF_OPTION_TYPE_STR, - .default_value = BACKEND_VG}, - { - .key = {"bd-aio"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Support for native Linux AIO" - }, - - { .key = {NULL} } -}; diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h deleted file mode 100644 index 3491349f2e4..00000000000 --- a/xlators/storage/bd/src/bd.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - BD translator - Exports Block devices on server side as regular - files to client - - Copyright IBM, Corp. 2012 - - This file is part of GlusterFS. - - Author: - M. Mohan Kumar <mohan@in.ibm.com> - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _BD_H -#define _BD_H - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_LIBAIO -#include <libaio.h> -#endif - -#include "xlator.h" -#include "mem-types.h" - -#define BD_XLATOR "block device mapper xlator" -#define BACKEND_VG "vg" -#define GF_XATTR "user.glusterfs" -#define BD_XATTR GF_XATTR ".bd" - -#define BD_LV "lv" -#define BD_THIN "thin" - -#define VOL_TYPE "volume.type" -#define VOL_CAPS "volume.caps" - -#define ALIGN_SIZE 4096 - -#define BD_CAPS_BD 0x01 -#define BD_CAPS_THIN 0x02 -#define BD_CAPS_OFFLOAD_COPY 0x04 -#define BD_CAPS_OFFLOAD_SNAPSHOT 0x08 -#define BD_CAPS_OFFLOAD_ZERO 0x20 - -#define BD_CLONE "clone" -#define BD_SNAPSHOT "snapshot" -#define BD_MERGE "merge" -#define BD_ORIGIN "list-origin" - -#define IOV_NR 4 -#define IOV_SIZE (64 * 1024) - -#define ALIGN_SIZE 4096 -#define LINKTO "trusted.glusterfs.dht.linkto" - -#define MAX_NO_VECT 1024 - - -#define BD_VALIDATE_MEM_ALLOC(buff, op_errno, label) \ - if (!buff) { \ - op_errno = ENOMEM; \ - gf_log (this->name, GF_LOG_ERROR, "out of memory"); \ - goto label; \ - } - -#define BD_VALIDATE_LOCAL_OR_GOTO(local, op_errno, label) \ - if (!local) { \ - op_errno = EINVAL; \ - goto label; \ - } - -#define BD_STACK_UNWIND(typ, frame, args ...) do { \ - bd_local_t *__local = frame->local; \ - xlator_t *__this = frame->this; \ - \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (typ, frame, args); \ - if (__local) \ - bd_local_free (__this, __local); \ - } while (0) - -typedef char bd_gfid_t[GF_UUID_BUF_SIZE]; - -/** - * bd_fd - internal structure - */ -typedef struct bd_fd { - int fd; - int32_t flag; - int odirect; -} bd_fd_t; - -typedef struct bd_priv { - lvm_t handle; - char *vg; - char *pool; - int caps; - gf_boolean_t aio_init_done; - gf_boolean_t aio_capable; - gf_boolean_t aio_configured; -#ifdef HAVE_LIBAIO - io_context_t ctxp; - pthread_t aiothread; -#endif -} bd_priv_t; - - -typedef enum bd_type { - BD_TYPE_NONE, - BD_TYPE_LV, -} bd_type_t; - -typedef struct { - struct iatt iatt; - char *type; -} bd_attr_t; - -typedef enum { - BD_OF_NONE, - BD_OF_CLONE, - BD_OF_SNAPSHOT, - BD_OF_MERGE, -} bd_offload_t; - -typedef struct { - dict_t *dict; - bd_attr_t *bdatt; - inode_t *inode; - loc_t loc; - fd_t *fd; - data_t *data; /* for setxattr */ - bd_offload_t offload; - uint64_t size; - loc_t *dloc; -} bd_local_t; - -/* Prototypes */ -int bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx); -int bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx); -int bd_scan_vg (xlator_t *this, bd_priv_t *priv); -bd_local_t *bd_local_init (call_frame_t *frame, xlator_t *this); -void bd_local_free (xlator_t *this, bd_local_t *local); -int bd_fd_ctx_get (xlator_t *this, fd_t *fd, bd_fd_t **bdfd); -char *page_aligned_alloc (size_t size, char **aligned_buf); -int bd_validate_bd_xattr (xlator_t *this, char *bd, char **type, - uint64_t *lv_size, uuid_t uuid); -uint64_t bd_get_default_extent (bd_priv_t *priv); -uint64_t bd_adjust_size (bd_priv_t *priv, size_t size); -int bd_create (uuid_t uuid, uint64_t size, char *type, bd_priv_t *priv); -int bd_resize (bd_priv_t *priv, uuid_t uuid, size_t size); -int bd_delete_lv (bd_priv_t *priv, const char *lv_name, int *op_errno); -int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv); -int bd_clone (bd_local_t *local, bd_priv_t *priv); - -int bd_merge (bd_priv_t *priv, uuid_t gfid); -int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); -void bd_update_amtime(struct iatt *iatt, int flag); -int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv); -int bd_clone (bd_local_t *local, bd_priv_t *priv); -int bd_merge (bd_priv_t *priv, uuid_t gfid); -int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); -int bd_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, size_t len, struct iatt *prebuf, - struct iatt *postbuf); - -#endif diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am index 509b0524921..c080a229ff3 100644 --- a/xlators/storage/posix/src/Makefile.am +++ b/xlators/storage/posix/src/Makefile.am @@ -1,20 +1,25 @@ - +if WITH_SERVER xlator_LTLIBRARIES = posix.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage -posix_la_LDFLAGS = -module -avoid-version +posix_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c +posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c \ + posix-gfid-path.c posix-entry-ops.c posix-inode-fd-ops.c \ + posix-common.c posix-metadata.c posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO) \ - $(ACL_LIBS) + $(ACL_LIBS) -noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h +noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \ + posix-messages.h posix-gfid-path.h posix-inode-handle.h \ + posix-metadata.h posix-metadata-disk.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src \ - -I$(top_srcdir)/rpc/rpc-lib/src + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/timer-wheel -AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) +AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) -I$(top_srcdir)/glusterfsd/src CLEANFILES = diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c index c3bbddd6737..d0cb0002bbf 100644 --- a/xlators/storage/posix/src/posix-aio.c +++ b/xlators/storage/posix/src/posix-aio.c @@ -7,563 +7,550 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "xlator.h" -#include "glusterfs.h" #include "posix.h" #include <sys/uio.h> +#include "posix-messages.h" #ifdef HAVE_LIBAIO #include <libaio.h> - void -__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, - off_t offset, size_t size) +__posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size) { - int odirect = 0; - int flags = 0; - int ret = 0; - - odirect = pfd->odirect; - - if ((fd->flags|opflags) & O_DIRECT) { - /* if instructed, use O_DIRECT always */ - odirect = 1; - } else { - /* else use O_DIRECT when feasible */ - if ((offset|size) & 0xfff) - odirect = 0; - else - odirect = 1; - } - - if (!odirect && pfd->odirect) { - flags = fcntl (pfd->fd, F_GETFL); - ret = fcntl (pfd->fd, F_SETFL, (flags & (~O_DIRECT))); - pfd->odirect = 0; - } - - if (odirect && !pfd->odirect) { - flags = fcntl (pfd->fd, F_GETFL); - ret = fcntl (pfd->fd, F_SETFL, (flags | O_DIRECT)); - pfd->odirect = 1; - } - - if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, - "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d", - strerror (errno), pfd->fd, flags, pfd->odirect); - } + int odirect = 0; + int flags = 0; + int ret = 0; + + odirect = pfd->odirect; + + if ((fd->flags | opflags) & O_DIRECT) { + /* if instructed, use O_DIRECT always */ + odirect = 1; + } else { + /* else use O_DIRECT when feasible */ + if ((offset | size) & 0xfff) + odirect = 0; + else + odirect = 1; + } + + if (!odirect && pfd->odirect) { + flags = fcntl(pfd->fd, F_GETFL); + ret = fcntl(pfd->fd, F_SETFL, (flags & (~O_DIRECT))); + pfd->odirect = 0; + } + + if (odirect && !pfd->odirect) { + flags = fcntl(pfd->fd, F_GETFL); + ret = fcntl(pfd->fd, F_SETFL, (flags | O_DIRECT)); + pfd->odirect = 1; + } + + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_FCNTL_FAILED, + "fcntl() failed. fd=%d flags=%d pfd->odirect=%d", pfd->fd, flags, + pfd->odirect); + } } - struct posix_aio_cb { - struct iocb iocb; - call_frame_t *frame; - struct iobuf *iobuf; - struct iobref *iobref; - struct iatt prebuf; - int fd; - int op; - off_t offset; + struct iocb iocb; + call_frame_t *frame; + struct iobuf *iobuf; + struct iobref *iobref; + struct iatt prebuf; + int _fd; + fd_t *fd; + int op; + off_t offset; }; - int -posix_aio_readv_complete (struct posix_aio_cb *paiocb, int res, int res2) +posix_aio_readv_complete(struct posix_aio_cb *paiocb, int res, int res2) { - call_frame_t *frame = NULL; - xlator_t *this = NULL; - struct iobuf *iobuf = NULL; - struct iatt postbuf = {0,}; - int _fd = -1; - int op_ret = -1; - int op_errno = 0; - struct iovec iov; - struct iobref *iobref = NULL; - int ret = 0; - off_t offset = 0; - struct posix_private * priv = NULL; - - - frame = paiocb->frame; - this = frame->this; - priv = this->private; - iobuf = paiocb->iobuf; - _fd = paiocb->fd; - offset = paiocb->offset; - - if (res < 0) { - op_ret = -1; - op_errno = -res; - gf_log (this->name, GF_LOG_ERROR, - "readv(async) failed fd=%d,size=%lu,offset=%llu (%d/%s)", - _fd, paiocb->iocb.u.c.nbytes, - (unsigned long long) paiocb->offset, - res, strerror (op_errno)); - goto out; - } - - ret = posix_fdstat (this, _fd, &postbuf); - if (ret != 0) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fstat failed on fd=%d: %s", _fd, - strerror (op_errno)); - goto out; - } - - op_ret = res; - op_errno = 0; - - iobref = iobref_new (); - if (!iobref) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - iobref_add (iobref, iobuf); - - iov.iov_base = iobuf_ptr (iobuf); - iov.iov_len = op_ret; - - - /* Hack to notify higher layers of EOF. */ - if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size) - op_errno = ENOENT; - - LOCK (&priv->lock); - { - priv->read_value += op_ret; - } - UNLOCK (&priv->lock); + call_frame_t *frame = NULL; + xlator_t *this = NULL; + struct iobuf *iobuf = NULL; + struct iatt postbuf = { + 0, + }; + int _fd = -1; + int op_ret = -1; + int op_errno = 0; + struct iovec iov; + struct iobref *iobref = NULL; + int ret = 0; + off_t offset = 0; + struct posix_private *priv = NULL; + fd_t *fd = NULL; + + frame = paiocb->frame; + this = frame->this; + priv = this->private; + iobuf = paiocb->iobuf; + fd = paiocb->fd; + _fd = paiocb->_fd; + offset = paiocb->offset; + + if (res < 0) { + op_ret = -1; + op_errno = -res; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_READV_FAILED, + "readv(async) failed fd=%d,size=%lu,offset=%llu (%d)", _fd, + paiocb->iocb.u.c.nbytes, (unsigned long long)paiocb->offset, + res); + goto out; + } + + ret = posix_fdstat(this, fd->inode, _fd, &postbuf); + if (ret != 0) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%d", _fd); + goto out; + } + + op_ret = res; + op_errno = 0; + + iobref = iobref_new(); + if (!iobref) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + iobref_add(iobref, iobuf); + + iov.iov_base = iobuf_ptr(iobuf); + iov.iov_len = op_ret; + + /* Hack to notify higher layers of EOF. */ + if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size) + op_errno = ENOENT; + + GF_ATOMIC_ADD(priv->read_value, op_ret); out: - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1, - &postbuf, iobref, NULL); - if (iobuf) - iobuf_unref (iobuf); - if (iobref) - iobref_unref (iobref); + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf, + iobref, NULL); + if (iobuf) + iobuf_unref(iobuf); + if (iobref) + iobref_unref(iobref); - GF_FREE (paiocb); + if (paiocb->fd) + fd_unref(paiocb->fd); - return 0; -} + GF_FREE(paiocb); + return 0; +} int -posix_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, uint32_t flags, dict_t *xdata) +posix_aio_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - int32_t op_errno = EINVAL; - int _fd = -1; - struct iobuf *iobuf = NULL; - struct posix_fd * pfd = NULL; - int ret = -1; - struct posix_aio_cb *paiocb = NULL; - struct posix_private *priv = NULL; - struct iocb *iocb = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - priv = this->private; - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto err; - } - _fd = pfd->fd; - - if (!size) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); - goto err; - } - - iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); - if (!iobuf) { - op_errno = ENOMEM; - goto err; - } - - paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb); - if (!paiocb) { - op_errno = ENOMEM; - goto err; - } - - - paiocb->frame = frame; - paiocb->iobuf = iobuf; - paiocb->offset = offset; - paiocb->fd = _fd; - paiocb->op = GF_FOP_READ; - - paiocb->iocb.data = paiocb; - paiocb->iocb.aio_fildes = _fd; - paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD; - paiocb->iocb.aio_reqprio = 0; - paiocb->iocb.u.c.buf = iobuf_ptr (iobuf); - paiocb->iocb.u.c.nbytes = size; - paiocb->iocb.u.c.offset = offset; - - iocb = &paiocb->iocb; - - LOCK (&fd->lock); - { - __posix_fd_set_odirect (fd, pfd, flags, offset, size); - - ret = io_submit (priv->ctxp, 1, &iocb); - } - UNLOCK (&fd->lock); - - if (ret != 1) { - gf_log (this->name, GF_LOG_ERROR, - "io_submit() returned %d", ret); - op_errno = -ret; - goto err; - } - - return 0; + int32_t op_errno = EINVAL; + int _fd = -1; + struct iobuf *iobuf = NULL; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_aio_cb *paiocb = NULL; + struct posix_private *priv = NULL; + struct iocb *iocb = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + priv = this->private; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto err; + } + _fd = pfd->fd; + + if (!size) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_INVALID_ARGUMENT, + "size=%" GF_PRI_SIZET, size); + goto err; + } + + iobuf = iobuf_get2(this->ctx->iobuf_pool, size); + if (!iobuf) { + op_errno = ENOMEM; + goto err; + } + + paiocb = GF_CALLOC(1, sizeof(*paiocb), gf_posix_mt_paiocb); + if (!paiocb) { + op_errno = ENOMEM; + goto err; + } + + paiocb->frame = frame; + paiocb->iobuf = iobuf; + paiocb->offset = offset; + paiocb->fd = fd_ref(fd); + paiocb->_fd = _fd; + paiocb->op = GF_FOP_READ; + + paiocb->iocb.data = paiocb; + paiocb->iocb.aio_fildes = _fd; + paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD; + paiocb->iocb.aio_reqprio = 0; + paiocb->iocb.u.c.buf = iobuf_ptr(iobuf); + paiocb->iocb.u.c.nbytes = size; + paiocb->iocb.u.c.offset = offset; + + iocb = &paiocb->iocb; + + LOCK(&fd->lock); + { + __posix_fd_set_odirect(fd, pfd, flags, offset, size); + + ret = io_submit(priv->ctxp, 1, &iocb); + } + UNLOCK(&fd->lock); + + if (ret != 1) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_IO_SUBMIT_FAILED, + "io_submit() returned %d", ret); + goto err; + } + + return 0; err: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0); - if (iobuf) - iobuf_unref (iobuf); + STACK_UNWIND_STRICT(readv, frame, -1, op_errno, 0, 0, 0, 0, 0); + if (iobuf) + iobuf_unref(iobuf); - if (paiocb) - GF_FREE (paiocb); + if (paiocb) { + if (paiocb->fd) + fd_unref(paiocb->fd); + GF_FREE(paiocb); + } - return 0; + return 0; } - int -posix_aio_writev_complete (struct posix_aio_cb *paiocb, int res, int res2) +posix_aio_writev_complete(struct posix_aio_cb *paiocb, int res, int res2) { - call_frame_t *frame = NULL; - xlator_t *this = NULL; - struct iatt prebuf = {0,}; - struct iatt postbuf = {0,}; - int _fd = -1; - int op_ret = -1; - int op_errno = 0; - int ret = 0; - struct posix_private * priv = NULL; - - - frame = paiocb->frame; - this = frame->this; - priv = this->private; - prebuf = paiocb->prebuf; - _fd = paiocb->fd; - - if (res < 0) { - op_ret = -1; - op_errno = -res; - gf_log (this->name, GF_LOG_ERROR, - "writev(async) failed fd=%d,offset=%llu (%d/%s)", - _fd, (unsigned long long) paiocb->offset, res, - strerror (op_errno)); - - goto out; - } - - ret = posix_fdstat (this, _fd, &postbuf); - if (ret != 0) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fstat failed on fd=%d: %s", _fd, - strerror (op_errno)); - goto out; - } - - - op_ret = res; - op_errno = 0; - - LOCK (&priv->lock); - { - priv->write_value += op_ret; - } - UNLOCK (&priv->lock); + call_frame_t *frame = NULL; + xlator_t *this = NULL; + struct iatt prebuf = { + 0, + }; + struct iatt postbuf = { + 0, + }; + int _fd = -1; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + struct posix_private *priv = NULL; + fd_t *fd = NULL; + + if (!paiocb) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + frame = paiocb->frame; + this = frame->this; + priv = this->private; + prebuf = paiocb->prebuf; + fd = paiocb->fd; + _fd = paiocb->_fd; + + if (res < 0) { + op_ret = -1; + op_errno = -res; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITEV_FAILED, + "writev(async) failed fd=%d,offset=%llu (%d)", _fd, + (unsigned long long)paiocb->offset, res); + + goto out; + } + + ret = posix_fdstat(this, fd->inode, _fd, &postbuf); + if (ret != 0) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%d", _fd); + goto out; + } + + op_ret = res; + op_errno = 0; + + GF_ATOMIC_ADD(priv->write_value, op_ret); out: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf, - NULL); - - if (paiocb) { - if (paiocb->iobref) - iobref_unref (paiocb->iobref); - GF_FREE (paiocb); - } - - return 0; + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &prebuf, &postbuf, + NULL); + + if (paiocb) { + if (paiocb->iobref) + iobref_unref(paiocb->iobref); + if (paiocb->fd) + fd_unref(paiocb->fd); + GF_FREE(paiocb); + } + + return 0; } - int -posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *iov, int count, off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +posix_aio_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *iov, int count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - int32_t op_errno = EINVAL; - int _fd = -1; - struct posix_fd * pfd = NULL; - int ret = -1; - struct posix_aio_cb *paiocb = NULL; - struct posix_private *priv = NULL; - struct iocb *iocb = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - priv = this->private; - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto err; - } - _fd = pfd->fd; - - paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb); - if (!paiocb) { - op_errno = ENOMEM; - goto err; - } - - - paiocb->frame = frame; - paiocb->offset = offset; - paiocb->fd = _fd; - paiocb->op = GF_FOP_WRITE; - - paiocb->iocb.data = paiocb; - paiocb->iocb.aio_fildes = _fd; - paiocb->iobref = iobref_ref (iobref); - paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV; - paiocb->iocb.aio_reqprio = 0; - paiocb->iocb.u.v.vec = iov; - paiocb->iocb.u.v.nr = count; - paiocb->iocb.u.v.offset = offset; - - iocb = &paiocb->iocb; - - ret = posix_fdstat (this, _fd, &paiocb->prebuf); - if (ret != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto err; - } - - - LOCK (&fd->lock); - { - __posix_fd_set_odirect (fd, pfd, flags, offset, - iov_length (iov, count)); - - ret = io_submit (priv->ctxp, 1, &iocb); - } - UNLOCK (&fd->lock); - - if (ret != 1) { - gf_log (this->name, GF_LOG_ERROR, - "io_submit() returned %d", ret); - op_errno = -ret; - goto err; - } - - return 0; + int32_t op_errno = EINVAL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_aio_cb *paiocb = NULL; + struct posix_private *priv = NULL; + struct iocb *iocb = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_errno, op_errno, err); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto err; + } + _fd = pfd->fd; + + paiocb = GF_CALLOC(1, sizeof(*paiocb), gf_posix_mt_paiocb); + if (!paiocb) { + op_errno = ENOMEM; + goto err; + } + + paiocb->frame = frame; + paiocb->offset = offset; + paiocb->fd = fd_ref(fd); + paiocb->_fd = _fd; + paiocb->op = GF_FOP_WRITE; + + paiocb->iocb.data = paiocb; + paiocb->iocb.aio_fildes = _fd; + paiocb->iobref = iobref_ref(iobref); + paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV; + paiocb->iocb.aio_reqprio = 0; + paiocb->iocb.u.v.vec = iov; + paiocb->iocb.u.v.nr = count; + paiocb->iocb.u.v.offset = offset; + + iocb = &paiocb->iocb; + + ret = posix_fdstat(this, fd->inode, _fd, &paiocb->prebuf); + if (ret != 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto err; + } + + LOCK(&fd->lock); + { + __posix_fd_set_odirect(fd, pfd, flags, offset, iov_length(iov, count)); + + ret = io_submit(priv->ctxp, 1, &iocb); + } + UNLOCK(&fd->lock); + + if (ret != 1) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_IO_SUBMIT_FAILED, + "io_submit() returned %d,gfid=%s", ret, + uuid_utoa(fd->inode->gfid)); + goto err; + } + + return 0; err: - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0); + STACK_UNWIND_STRICT(writev, frame, -1, op_errno, 0, 0, 0); - if (paiocb) { - if (paiocb->iobref) - iobref_unref (paiocb->iobref); - GF_FREE (paiocb); - } + if (paiocb) { + if (paiocb->iobref) + iobref_unref(paiocb->iobref); + if (paiocb->fd) + fd_unref(paiocb->fd); + GF_FREE(paiocb); + } - return 0; + return 0; } - void * -posix_aio_thread (void *data) +posix_aio_thread(void *data) { - xlator_t *this = NULL; - struct posix_private *priv = NULL; - int ret = 0; - int i = 0; - struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS]; - struct io_event *event = NULL; - struct posix_aio_cb *paiocb = NULL; - - this = data; - THIS = this; - priv = this->private; - - for (;;) { - memset (&events[0], 0, sizeof (events)); - ret = io_getevents (priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS, - &events[0], NULL); - if (ret <= 0) { - gf_log (this->name, GF_LOG_ERROR, - "io_getevents() returned %d", ret); - if (ret == -EINTR) - continue; - break; - } - - for (i = 0; i < ret; i++) { - event = &events[i]; - - paiocb = event->data; - - switch (paiocb->op) { - case GF_FOP_READ: - posix_aio_readv_complete (paiocb, event->res, - event->res2); - break; - case GF_FOP_WRITE: - posix_aio_writev_complete (paiocb, event->res, - event->res2); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "unknown op %d found in piocb", - paiocb->op); - break; - } - } + xlator_t *this = NULL; + struct posix_private *priv = NULL; + int ret = 0; + int i = 0; + struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS]; + struct io_event *event = NULL; + struct posix_aio_cb *paiocb = NULL; + + this = data; + THIS = this; + priv = this->private; + + for (;;) { + memset(&events[0], 0, sizeof(events)); + ret = io_getevents(priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS, + &events[0], NULL); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_IO_GETEVENTS_FAILED, + "io_getevents() returned %d", ret); + if (ret == -EINTR) + continue; + break; } - return NULL; -} + for (i = 0; i < ret; i++) { + event = &events[i]; + + paiocb = event->data; + + switch (paiocb->op) { + case GF_FOP_READ: + posix_aio_readv_complete(paiocb, event->res, event->res2); + break; + case GF_FOP_WRITE: + posix_aio_writev_complete(paiocb, event->res, event->res2); + break; + default: + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_OP, + "unknown op %d found in piocb", paiocb->op); + break; + } + } + } + return NULL; +} int -posix_aio_init (xlator_t *this) +posix_aio_init(xlator_t *this) { - struct posix_private *priv = NULL; - int ret = 0; - - priv = this->private; - - ret = io_setup (POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp); - if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) { - gf_log (this->name, GF_LOG_WARNING, - "Linux AIO not available at run-time." - " Continuing with synchronous IO"); - ret = 0; - goto out; - } - - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "io_setup() failed. ret=%d, errno=%d", - ret, errno); - goto out; - } - - ret = gf_thread_create (&priv->aiothread, NULL, - posix_aio_thread, this); - if (ret != 0) { - io_destroy (priv->ctxp); - goto out; - } - - this->fops->readv = posix_aio_readv; - this->fops->writev = posix_aio_writev; + struct posix_private *priv = NULL; + int ret = 0; + + priv = this->private; + + ret = io_setup(POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp); + if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_AIO_UNAVAILABLE, + "Linux AIO not available at run-time." + " Continuing with synchronous IO"); + ret = 0; + goto out; + } + + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_IO_SETUP_FAILED, + "io_setup() failed. ret=%d", ret); + goto out; + } + + ret = gf_thread_create(&priv->aiothread, NULL, posix_aio_thread, this, + "posixaio"); + if (ret != 0) { + io_destroy(priv->ctxp); + goto out; + } + + this->fops->readv = posix_aio_readv; + this->fops->writev = posix_aio_writev; out: - return ret; + return ret; } - int -posix_aio_on (xlator_t *this) +posix_aio_on(xlator_t *this) { - struct posix_private *priv = NULL; - int ret = 0; - - priv = this->private; - - if (!priv->aio_init_done) { - ret = posix_aio_init (this); - if (ret == 0) - priv->aio_capable = _gf_true; - else - priv->aio_capable = _gf_false; - priv->aio_init_done = _gf_true; - } - - if (priv->aio_capable) { - this->fops->readv = posix_aio_readv; - this->fops->writev = posix_aio_writev; - } - - return ret; + struct posix_private *priv = NULL; + int ret = 0; + + priv = this->private; + + if (!priv->aio_init_done) { + ret = posix_aio_init(this); + if (ret == 0) + priv->aio_capable = _gf_true; + else + priv->aio_capable = _gf_false; + priv->aio_init_done = _gf_true; + } + + if (priv->aio_capable) { + this->fops->readv = posix_aio_readv; + this->fops->writev = posix_aio_writev; + } + + return ret; } int -posix_aio_off (xlator_t *this) +posix_aio_off(xlator_t *this) { - this->fops->readv = posix_readv; - this->fops->writev = posix_writev; + this->fops->readv = posix_readv; + this->fops->writev = posix_writev; - return 0; + return 0; } - #else - int -posix_aio_on (xlator_t *this) +posix_aio_on(xlator_t *this) { - gf_log (this->name, GF_LOG_INFO, - "Linux AIO not available at build-time." - " Continuing with synchronous IO"); - return 0; + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE, + "Linux AIO not available at build-time." + " Continuing with synchronous IO"); + return 0; } int -posix_aio_off (xlator_t *this) +posix_aio_off(xlator_t *this) { - gf_log (this->name, GF_LOG_INFO, - "Linux AIO not available at build-time." - " Continuing with synchronous IO"); - return 0; + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE, + "Linux AIO not available at build-time." + " Continuing with synchronous IO"); + return 0; } void -__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, - off_t offset, size_t size) +__posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size) { - xlator_t *this = THIS; - gf_log (this->name, GF_LOG_INFO, - "Linux AIO not available at build-time." - " Continuing with synchronous IO"); - return; + xlator_t *this = THIS; + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_AIO_UNAVAILABLE, + "Linux AIO not available at build-time." + " Continuing with synchronous IO"); + return; } + #endif diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h index 5bde716019a..b316deb3229 100644 --- a/xlators/storage/posix/src/posix-aio.h +++ b/xlators/storage/posix/src/posix-aio.h @@ -10,14 +10,6 @@ #ifndef _POSIX_AIO_H #define _POSIX_AIO_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "xlator.h" -#include "glusterfs.h" - // Maximum number of concurrently submitted IO events. The heaviest load // GlusterFS has been able to handle had 60-80 concurrent calls #define POSIX_AIO_MAX_NR_EVENTS 256 @@ -25,15 +17,18 @@ // Maximum number of completed IO operations to reap per getevents syscall #define POSIX_AIO_MAX_NR_GETEVENTS 16 +int +posix_aio_on(xlator_t *this); +int +posix_aio_off(xlator_t *this); -int posix_aio_on (xlator_t *this); -int posix_aio_off (xlator_t *this); - -int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata); +int +posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); -int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata); +int +posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata); #endif /* !_POSIX_AIO_H */ diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c new file mode 100644 index 00000000000..f10722ec3fb --- /dev/null +++ b/xlators/storage/posix/src/posix-common.c @@ -0,0 +1,1524 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include "posix-inode-handle.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> +#include "glusterfs3-xdr.h" +#include "posix-aio.h" +#include <glusterfs/glusterfs-acl.h> +#include "posix-messages.h" +#include <glusterfs/events.h> +#include "posix-gfid-path.h" +#include <glusterfs/compat-uuid.h> +#include "timer-wheel.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR \ + uid_t old_fsuid; \ + gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) \ + do { \ + old_fsuid = setfsuid(uid); \ + old_fsgid = setfsgid(gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() \ + do { \ + setfsuid(old_fsuid); \ + setfsgid(old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#endif + +int32_t +posix_priv(xlator_t *this) +{ + struct posix_private *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + if (!this) + return 0; + + (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); + gf_proc_dump_add_section("%s", key_prefix); + + priv = this->private; + + if (!priv) + return 0; + + gf_proc_dump_write("base_path", "%s", priv->base_path); + gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); + gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); + gf_proc_dump_write("max_write", "%" PRId64, + GF_ATOMIC_GET(priv->write_value)); + + return 0; +} + +int32_t +posix_inode(xlator_t *this) +{ + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +posix_notify(xlator_t *this, int32_t event, void *data, ...) +{ + xlator_t *victim = data; + struct posix_private *priv = this->private; + int ret = 0; + struct timespec sleep_till = { + 0, + }; + glusterfs_ctx_t *ctx = this->ctx; + + switch (event) { + case GF_EVENT_PARENT_UP: { + /* Notify the parent that posix xlator is up */ + default_notify(this, GF_EVENT_CHILD_UP, data); + } break; + + case GF_EVENT_PARENT_DOWN: { + if (!victim->cleanup_starting) + break; + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); + { + priv->janitor_task_stop = _gf_true; + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, + priv->janitor); + if (!ret) { + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + /* Wait to set janitor_task flag to _gf_false by + * janitor_task_done */ + while (priv->janitor_task_stop) { + (void)pthread_cond_timedwait(&priv->janitor_cond, + &priv->janitor_mutex, + &sleep_till); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + } + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + GF_FREE(priv->janitor); + } + priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; + default: + /* */ + break; + } + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_posix_mt_end + 1); + + if (ret != 0) { + return ret; + } + + return ret; +} + +static int +posix_set_owner(xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + struct stat st = { + 0, + }; + + priv = this->private; + + ret = sys_lstat(priv->base_path, &st); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED, + "Failed to stat " + "brick path %s", + priv->base_path); + return ret; + } + + if ((uid == -1 || st.st_uid == uid) && (gid == -1 || st.st_gid == gid)) + return 0; + + ret = sys_chown(priv->base_path, uid, gid); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED, + "Failed to set uid/gid for" + " brick path %s", + priv->base_path); + + return ret; +} +static int +set_gfid2path_separator(struct posix_private *priv, const char *str) +{ + int str_len = 0; + + str_len = strlen(str); + if (str_len > 0 && str_len < 8) { + strcpy(priv->gfid2path_sep, str); + return 0; + } + + return -1; +} + +static int +set_batch_fsync_mode(struct posix_private *priv, const char *str) +{ + if (strcmp(str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp(str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp(str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp(str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp(str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + +#ifdef GF_DARWIN_HOST_OS +static int +set_xattr_user_namespace_mode(struct posix_private *priv, const char *str) +{ + if (strcmp(str, "none") == 0) + priv->xattr_user_namespace = XATTR_NONE; + else if (strcmp(str, "strip") == 0) + priv->xattr_user_namespace = XATTR_STRIP; + else if (strcmp(str, "append") == 0) + priv->xattr_user_namespace = XATTR_APPEND; + else if (strcmp(str, "both") == 0) + priv->xattr_user_namespace = XATTR_BOTH; + else + return -1; + return 0; +} +#endif + +int +posix_reconfigure(xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str = NULL; + char *gfid2path_sep = NULL; + int32_t force_create_mode = -1; + int32_t force_directory_mode = -1; + int32_t create_mask = -1; + int32_t create_directory_mask = -1; + + priv = this->private; + + GF_OPTION_RECONF("brick-uid", uid, options, int32, out); + GF_OPTION_RECONF("brick-gid", gid, options, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner(this, uid, gid); + + GF_OPTION_RECONF("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF("batch-fsync-mode", batch_fsync_mode_str, options, str, + out); + + if (set_batch_fsync_mode(priv, batch_fsync_mode_str) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF("gfid2path-separator", gfid2path_sep, options, str, out); + if (set_gfid2path_separator(priv, gfid2path_sep) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_RECONF("xattr-user-namespace-mode", xattr_user_namespace_mode_str, + options, str, out); + + if (set_xattr_user_namespace_mode(priv, xattr_user_namespace_mode_str) != + 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } + +#endif + + GF_OPTION_RECONF("linux-aio", priv->aio_configured, options, bool, out); + + if (priv->aio_configured) + posix_aio_on(this); + else + posix_aio_off(this); + + GF_OPTION_RECONF("update-link-count-parent", priv->update_pgfid_nlinks, + options, bool, out); + + GF_OPTION_RECONF("gfid2path", priv->gfid2path, options, bool, out); + + GF_OPTION_RECONF("node-uuid-pathinfo", priv->node_uuid_pathinfo, options, + bool, out); + + if (priv->node_uuid_pathinfo && (gf_uuid_is_null(priv->glusterd_uuid))) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ + priv->disk_unit = 0; + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + + if (priv->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed"); + goto out; + } + } + + GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, + options, uint32, out); + GF_OPTION_RECONF("health-check-timeout", priv->health_check_timeout, + options, uint32, out); + if (priv->health_check_interval) { + ret = posix_spawn_health_check_thread(this); + if (ret) + goto out; + } + + GF_OPTION_RECONF("shared-brick-count", priv->shared_brick_count, options, + int32, out); + + GF_OPTION_RECONF("disable-landfill-purge", priv->disable_landfill_purge, + options, bool, out); + if (priv->disable_landfill_purge) { + gf_log(this->name, GF_LOG_WARNING, + "Janitor WILL NOT purge the landfill directory. " + "Your landfill directory" + " may fill up this brick."); + } else { + gf_msg_debug(this->name, 0, + "Janitor will purge the landfill " + "directory, which is default behavior"); + } + + GF_OPTION_RECONF("force-create-mode", force_create_mode, options, int32, + out); + priv->force_create_mode = force_create_mode; + + GF_OPTION_RECONF("force-directory-mode", force_directory_mode, options, + int32, out); + priv->force_directory_mode = force_directory_mode; + + GF_OPTION_RECONF("create-mask", create_mask, options, int32, out); + priv->create_mask = create_mask; + + GF_OPTION_RECONF("create-directory-mask", create_directory_mask, options, + int32, out); + priv->create_directory_mask = create_directory_mask; + + GF_OPTION_RECONF("max-hardlinks", priv->max_hardlinks, options, uint32, + out); + + GF_OPTION_RECONF("fips-mode-rchecksum", priv->fips_mode_rchecksum, options, + bool, out); + + GF_OPTION_RECONF("ctime", priv->ctime, options, bool, out); + + ret = 0; +out: + return ret; +} + +int32_t +posix_delete_unlink_entry(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + int ret = 0; + + if (!fpath) + goto out; + + switch (typeflag) { + case FTW_SL: + case FTW_NS: + case FTW_F: + case FTW_SLN: + ret = sys_unlink(fpath); + break; + case FTW_D: + case FTW_DP: + case FTW_DNR: + if (ftwbuf->level != 0) { + ret = sys_rmdir(fpath); + } + break; + default: + break; + } + if (ret) { + gf_msg("posix_delete_unlink_entry", GF_LOG_WARNING, errno, + P_MSG_HANDLE_CREATE, + "Deletion of entries %s failed" + "Please delete it manually", + fpath); + } +out: + return 0; +} + +int32_t +posix_delete_unlink(const char *unlink_path) +{ + int ret = -1; + int flags = 0; + + flags |= (FTW_DEPTH | FTW_PHYS); + + ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags); + if (ret) { + gf_msg("posix_delete_unlink", GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Deleting files from %s failed", unlink_path); + } + return ret; +} + +int32_t +posix_create_unlink_dir(xlator_t *this) +{ + struct posix_private *priv = NULL; + struct stat stbuf; + int ret = -1; + uuid_t gfid = {0}; + char gfid_str[64] = {0}; + char unlink_path[PATH_MAX] = { + 0, + }; + char landfill_path[PATH_MAX] = { + 0, + }; + + priv = this->private; + + (void)snprintf(unlink_path, sizeof(unlink_path), "%s/%s", priv->base_path, + GF_UNLINK_PATH); + + gf_uuid_generate(gfid); + uuid_utoa_r(gfid, gfid_str); + + (void)snprintf(landfill_path, sizeof(landfill_path), "%s/%s/%s", + priv->base_path, GF_LANDFILL_PATH, gfid_str); + + ret = sys_stat(unlink_path, &stbuf); + switch (ret) { + case -1: + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Checking for %s failed", unlink_path); + return -1; + } + break; + case 0: + if (!S_ISDIR(stbuf.st_mode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Not a directory: %s", unlink_path); + return -1; + } + ret = posix_delete_unlink(unlink_path); + return 0; + default: + break; + } + ret = sys_mkdir(unlink_path, 0600); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", unlink_path); + return -1; + } + + return 0; +} + +int +posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name) +{ + int ret = -1; + + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno == ENOENT) { + ret = sys_mkdirat(pdirfd, dir_name, 0700); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", dir_name); + goto out; + } + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-1 %s ", dir_name); + goto out; + } + } +out: + return ret; +} + +/** + * init - + */ +int +posix_init(xlator_t *this) +{ + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = { + 0, + }; + gf_boolean_t tmp_bool = 0; + int ret = 0; + int op_ret = -1; + int op_errno = 0; + ssize_t size = -1; + uuid_t old_uuid = { + 0, + }; + uuid_t dict_uuid = { + 0, + }; + uuid_t gfid = { + 0, + }; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char *guuid = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str; + char *gfid2path_sep = NULL; + int force_create = -1; + int force_directory = -1; + int create_mask = -1; + int create_directory_mask = -1; + char dir_handle[PATH_MAX] = { + 0, + }; + int i; + char fhash[4] = { + 0, + }; + int hdirfd = -1; + char value; + + dir_data = dict_get(this->options, "directory"); + + if (this->children) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR, + "FATAL: storage/posix cannot have subvolumes"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING, + "Volume is dangling. Please check the volume file."); + } + + if (!dir_data) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_EXPORT_DIR_MISSING, + "Export directory not specified in volume file."); + ret = -1; + goto out; + } + + umask(000); // umask `masking' is done at the client side + + /* Check whether the specified directory exists, if not log it. */ + op_ret = sys_stat(dir_data->data, &buf); + if ((op_ret != 0) || !S_ISDIR(buf.st_mode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, + "Directory '%s' doesn't exist, exiting.", dir_data->data); + ret = -1; + goto out; + } + + _private = GF_CALLOC(1, sizeof(*_private), gf_posix_mt_posix_private); + if (!_private) { + ret = -1; + goto out; + } + + _private->base_path = gf_strdup(dir_data->data); + _private->base_path_length = dir_data->len - 1; + + _private->dirfd = -1; + _private->mount_lock = -1; + for (i = 0; i < 256; i++) + _private->arrdfd[i] = -1; + + ret = dict_get_str(this->options, "hostname", &_private->hostname); + if (ret) { + _private->hostname = GF_CALLOC(256, sizeof(char), gf_common_mt_char); + if (!_private->hostname) { + goto out; + } + ret = gethostname(_private->hostname, 256); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HOSTNAME_MISSING, + "could not find hostname "); + } + } + + /* Check for Extended attribute support, if not present, log it */ + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); + tmp_data = dict_get(this->options, "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION, + "wrong option provided for key " + "\"mandate-attribute\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "exiting."); + ret = -1; + goto out; + } + } else { + gf_msg(this->name, GF_LOG_CRITICAL, 0, P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, exiting."); + ret = -1; + goto out; + } + } + + tmp_data = dict_get(this->options, "volume-id"); + if (tmp_data) { + op_ret = gf_uuid_parse(tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_VOLUME_ID, + "wrong volume-id (%s) set" + " in volume file", + tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr(dir_data->data, "trusted.glusterfs.volume-id", + old_uuid, 16); + if (size == 16) { + if (gf_uuid_compare(old_uuid, dict_uuid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_VOLUME_ID, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa(old_uuid)); + gf_event(EVENT_POSIX_ALREADY_PART_OF_VOLUME, + "volume-id=%s;brick=%s:%s", uuid_utoa(old_uuid), + _private->hostname, _private->base_path); + ret = -1; + goto out; + } + } else if ((size == -1) && (errno == ENODATA || errno == ENOATTR)) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_VOLUME_ID_ABSENT, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + gf_event(EVENT_POSIX_BRICK_NOT_IN_VOLUME, "brick=%s:%s", + _private->hostname, _private->base_path); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA) && (errno != ENOATTR)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_event(EVENT_POSIX_BRICK_VERIFICATION_FAILED, "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg(this->name, GF_LOG_WARNING, errno, + P_MSG_VOLUME_ID_FETCH_FAILED, + "%s: failed to fetch volume-id", dir_data->data); + ret = -1; + goto out; + } else { + ret = -1; + gf_event(EVENT_POSIX_BRICK_VERIFICATION_FAILED, "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_VOLUME_ID_FETCH_FAILED, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr(dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid(gfid)) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_SET_FAILED, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa(gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_SET_FAILED, + "%s: wrong value set as gfid", dir_data->data); + ret = -1; + goto out; + } else if ((size == -1) && (errno != ENODATA) && (errno != ENOATTR)) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_SET_FAILED, + "%s: failed to fetch gfid", dir_data->data); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr(dir_data->data, "trusted.gfid", rootgfid, 16, + XATTR_CREATE); + if (size == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_GFID_SET_FAILED, + "%s: failed to set gfid", dir_data->data); + ret = -1; + goto out; + } + } + + ret = 0; + + size = sys_lgetxattr(dir_data->data, POSIX_ACL_ACCESS_XATTR, NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_NOTSUP, + "Posix access control list is not supported."); + gf_event(EVENT_POSIX_ACL_NOT_SUPPORTED, "brick=%s:%s", + _private->hostname, _private->base_path); + } + + /* + * _XOPEN_PATH_MAX is the longest file path len we MUST + * support according to POSIX standard. When prepended + * by the brick base path it may exceed backed filesystem + * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If + * this is the case, chdir() to the brick base path and + * use relative paths when they are too long. See also + * MAKE_REAL_PATH in posix-handle.h + */ + _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); + if (_private->path_max != -1 && + _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { + ret = chdir(_private->base_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_BASEPATH_CHDIR_FAILED, + "chdir() to \"%s\" failed", _private->base_path); + goto out; + } +#ifdef __NetBSD__ + /* + * At least on NetBSD, the chdir() above uncovers a + * race condition which cause file lookup to fail + * with ENODATA for a few seconds. The volume quickly + * reaches a sane state, but regression tests are fast + * enough to choke on it. The reason is obscure (as + * often with race conditions), but sleeping here for + * a second seems to workaround the problem. + */ + sleep(1); +#endif + } + + LOCK_INIT(&_private->lock); + GF_ATOMIC_INIT(_private->read_value, 0); + GF_ATOMIC_INIT(_private->write_value, 0); + + _private->export_statfs = 1; + tmp_data = dict_get(this->options, "export-statfs-size"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &_private->export_statfs) == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } + if (!_private->export_statfs) + gf_msg_debug(this->name, 0, "'statfs()' returns dummy size"); + } + + _private->background_unlink = 0; + tmp_data = dict_get(this->options, "background-unlink"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &_private->background_unlink) == + -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL, + "'background-unlink'" + " takes only boolean options"); + goto out; + } + + if (_private->background_unlink) + gf_msg_debug(this->name, 0, + "unlinks will be performed in background"); + } + + tmp_data = dict_get(this->options, "o-direct"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &_private->o_direct) == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL, + "wrong option provided for 'o-direct'"); + goto out; + } + if (_private->o_direct) + gf_msg_debug(this->name, 0, + "o-direct mode is enabled" + " (O_DIRECT for every open)"); + } + + tmp_data = dict_get(this->options, "update-link-count-parent"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &_private->update_pgfid_nlinks) == + -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION, + "wrong value provided " + "for 'update-link-count-parent'"); + goto out; + } + if (_private->update_pgfid_nlinks) + gf_msg_debug(this->name, 0, + "update-link-count-parent" + " is enabled. Thus for each file an " + "extended attribute representing the " + "number of hardlinks for that file " + "within the same parent directory is" + " set."); + } + + ret = dict_get_str(this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (gf_uuid_parse(guuid, _private->glusterd_uuid)) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_INVALID_NODE_UUID, + "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_msg_debug(this->name, 0, + "No glusterd (node) UUID passed -" + " node-uuid xattr request will return \"No such" + " attribute\""); + } + ret = 0; + + GF_OPTION_INIT("janitor-sleep-duration", _private->janitor_sleep_duration, + int32, out); + + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY), + 0); + if (_private->mount_lock < 0) { + ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, + "Could not lock brick directory (%s)", strerror(op_errno)); + goto out; + } +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit(RLIMIT_NOFILE, &lim) == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SET_ULIMIT_FAILED, + "Failed to set 'ulimit -n " + " 1048576'"); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit(RLIMIT_NOFILE, &lim) == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, + P_MSG_SET_FILE_MAX_FAILED, + "Failed to set maximum allowed open " + "file descriptors to 64k"); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_MAX_FILE_OPEN, + "Maximum allowed " + "open file descriptors set to 65536"); + } + } + } +#endif + _private->shared_brick_count = 1; + ret = dict_get_int32(this->options, "shared-brick-count", + &_private->shared_brick_count); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_OPTION_VAL, + "'shared-brick-count' takes only integer " + "values"); + goto out; + } + + this->private = (void *)_private; + snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path, + GF_HIDDEN_PATH); + hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock, + dir_handle); + if (hdirfd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error open directory failed for dir %s", dir_handle); + ret = -1; + goto out; + } + _private->dirfd = hdirfd; + for (i = 0; i < 256; i++) { + snprintf(fhash, sizeof(fhash), "%02x", i); + _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd, + fhash); + if (_private->arrdfd[i] < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error openat failed for file %s", fhash); + ret = -1; + goto out; + } + } + + op_ret = posix_handle_init(this); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init(this); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_create_unlink_dir(this); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Creation of unlink directory failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT("brick-uid", uid, int32, out); + GF_OPTION_INIT("brick-gid", gid, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner(this, uid, gid); + + GF_OPTION_INIT("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on(this); + + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT("node-uuid-pathinfo", _private->node_uuid_pathinfo, bool, + out); + if (_private->node_uuid_pathinfo && + (gf_uuid_is_null(_private->glusterd_uuid))) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + + if (_private->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed "); + goto out; + } + } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT("health-check-interval", _private->health_check_interval, + uint32, out); + GF_OPTION_INIT("health-check-timeout", _private->health_check_timeout, + uint32, out); + if (_private->health_check_interval) { + ret = posix_spawn_health_check_thread(this); + if (ret) + goto out; + } + posix_janitor_timer_start(this); + + pthread_mutex_init(&_private->fsync_mutex, NULL); + pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) + goto out; + + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_FSYNCER_THREAD_CREATE_FAILED, + "fsyncer thread creation failed"); + goto out; + } + + GF_OPTION_INIT("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode(_private, batch_fsync_mode_str) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT("gfid2path", _private->gfid2path, bool, out); + + GF_OPTION_INIT("gfid2path-separator", gfid2path_sep, str, out); + if (set_gfid2path_separator(_private, gfid2path_sep) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_INIT("xattr-user-namespace-mode", xattr_user_namespace_mode_str, + str, out); + + if (set_xattr_user_namespace_mode(_private, + xattr_user_namespace_mode_str) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } +#endif + + GF_OPTION_INIT("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); + + GF_OPTION_INIT("disable-landfill-purge", _private->disable_landfill_purge, + bool, out); + if (_private->disable_landfill_purge) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "Janitor WILL NOT purge the landfill directory. " + "Your landfill directory" + " may fill up this brick."); + } + + GF_OPTION_INIT("force-create-mode", force_create, int32, out); + _private->force_create_mode = force_create; + + GF_OPTION_INIT("force-directory-mode", force_directory, int32, out); + _private->force_directory_mode = force_directory; + + GF_OPTION_INIT("create-mask", create_mask, int32, out); + _private->create_mask = create_mask; + + GF_OPTION_INIT("create-directory-mask", create_directory_mask, int32, out); + _private->create_directory_mask = create_directory_mask; + + GF_OPTION_INIT("max-hardlinks", _private->max_hardlinks, uint32, out); + + GF_OPTION_INIT("fips-mode-rchecksum", _private->fips_mode_rchecksum, bool, + out); + + GF_OPTION_INIT("ctime", _private->ctime, bool, out); + +out: + if (ret) { + if (_private) { + if (_private->dirfd >= 0) { + sys_close(_private->dirfd); + _private->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (_private->arrdfd[i] >= 0) { + sys_close(_private->arrdfd[i]); + _private->arrdfd[i] = -1; + } + } + /*unlock brick dir*/ + if (_private->mount_lock >= 0) { + (void)sys_close(_private->mount_lock); + _private->mount_lock = -1; + } + + GF_FREE(_private->base_path); + + GF_FREE(_private->hostname); + + GF_FREE(_private->trash_path); + + GF_FREE(_private); + } + + this->private = NULL; + } + return ret; +} + +void +posix_fini(xlator_t *this) +{ + struct posix_private *priv = this->private; + gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; + int ret = 0; + int i = 0; + + if (!priv) + return; + LOCK(&priv->lock); + { + health_check = priv->health_check_active; + priv->health_check_active = _gf_false; + } + UNLOCK(&priv->lock); + + if (priv->dirfd >= 0) { + sys_close(priv->dirfd); + priv->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (priv->arrdfd[i] >= 0) { + sys_close(priv->arrdfd[i]); + priv->arrdfd[i] = -1; + } + } + + if (health_check) { + (void)gf_thread_cleanup_xint(priv->health_check); + priv->health_check = 0; + } + + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; + (void)gf_thread_cleanup_xint(priv->disk_space_check); + priv->disk_space_check = 0; + } + + if (priv->janitor) { + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, + "Failed to delete janitor timer"); + } + GF_FREE(priv->janitor); + priv->janitor = NULL; + } + + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; + } + /*unlock brick dir*/ + if (priv->mount_lock >= 0) { + (void)sys_close(priv->mount_lock); + priv->mount_lock = -1; + } + + GF_FREE(priv->base_path); + LOCK_DESTROY(&priv->lock); + pthread_mutex_destroy(&priv->fsync_mutex); + pthread_cond_destroy(&priv->fsync_cond); + pthread_mutex_destroy(&priv->janitor_mutex); + pthread_cond_destroy(&priv->janitor_cond); + GF_FREE(priv->hostname); + GF_FREE(priv->trash_path); + GF_FREE(priv); + this->private = NULL; + + return; +} + +struct volume_options posix_options[] = { + {.key = {"o-direct"}, .type = GF_OPTION_TYPE_BOOL}, + {.key = {"directory"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = "{{brick.path}}"}, + {.key = {"hostname"}, .type = GF_OPTION_TYPE_ANY}, + {.key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on"}, + {.key = {"mandate-attribute"}, .type = GF_OPTION_TYPE_BOOL}, + {.key = {"background-unlink"}, .type = GF_OPTION_TYPE_BOOL}, + {.key = {"janitor-sleep-duration"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "10", + .description = "Interval (in seconds) between times the internal " + "'landfill' directory is emptied."}, + {.key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY, + .default_value = "{{brick.volumeid}}"}, + {.key = {"glusterd-uuid"}, .type = GF_OPTION_TYPE_STR}, + {.key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting uid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting gid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"health-check-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "20", + .validate = GF_OPT_VALIDATE_MIN, + .description = + "Interval in seconds to wait aio_write finish for health check, " + "set to 0 to disable", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"reserve"}, + .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, + .min = 0, + .default_value = "1", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Percentage/Size of disk space to be reserved." + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = + "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order.", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"update-link-count-parent"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable placeholders for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"gfid2path"}, + .type = GF_OPTION_TYPE_BOOL, +#ifdef __NetBSD__ + /* + * NetBSD storage of extended attributes for UFS1 badly + * scales when the list of extended attributes names rises. + * This option can add as many extended attributes names + * as we have files, hence we keep it disabled for performance + * sake. + */ + .default_value = "off", +#else + .default_value = "on", +#endif + .description = "Enable logging metadata for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE}, + {.key = {"gfid2path-separator"}, + .type = GF_OPTION_TYPE_STR, + .default_value = ":", + .description = "Path separator for glusterfs.gfidtopath virt xattr", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, +#if GF_DARWIN_HOST_OS + {.key = {"xattr-user-namespace-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "none", + .description = + "Option to control XATTR user namespace on the raw filesystem: " + "\t- None: Will use the user namespace, so files will be exchangeable " + "with Linux.\n" + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw " + "filesystem will work in OS X.\n", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, +#endif + { + .key = {"shared-brick-count"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = + "Number of bricks sharing the same backend export." + " Useful for displaying the proper usable size through statvfs() " + "call (df command)", + }, + { + .key = {"disable-landfill-purge"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Disable glusterfs/landfill purges. " + "WARNING: This can fill up a brick.", + .op_version = {GD_OP_VERSION_4_0_0}, + .tags = {"diagnosis"}, + }, + {.key = {"force-create-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_BOTH, + .description = "Mode bit permission that will always be set on a file."}, + {.key = {"force-directory-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_BOTH, + .description = "Mode bit permission that will be always set on directory"}, + {.key = {"create-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_BOTH, + .description = "Any bit not set here will be removed from the" + "modes set on a file when it is created"}, + {.key = {"create-directory-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_BOTH, + .description = "Any bit not set here will be removed from the" + "modes set on a directory when it is created"}, + {.key = {"max-hardlinks"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "100", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"posix"}, + .validate = GF_OPT_VALIDATE_MIN, + .description = "max number of hardlinks allowed on any one inode.\n" + "0 is unlimited, 1 prevents any hardlinking at all."}, + {.key = {"fips-mode-rchecksum"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"posix"}, + .description = "If enabled, posix_rchecksum uses the FIPS compliant" + "SHA256 checksum. MD5 otherwise."}, + {.key = {"ctime"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .op_version = {GD_OP_VERSION_4_1_0}, + .tags = {"ctime"}, + .description = + "When this option is enabled, time attributes (ctime,mtime,atime) " + "are stored in xattr to keep it consistent across replica and " + "distribute set. The time attributes stored at the backend are " + "not considered "}, + {.key = {NULL}}, +}; diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c new file mode 100644 index 00000000000..8cc3ccf8c00 --- /dev/null +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -0,0 +1,2496 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include "posix.h" +#include "posix-handle.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> +#include "glusterfs3-xdr.h" +#include "posix-aio.h" +#include <glusterfs/glusterfs-acl.h> +#include "posix-messages.h" +#include "posix-metadata.h" +#include <glusterfs/events.h> +#include "posix-gfid-path.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/syncop.h> + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR \ + uid_t old_fsuid; \ + gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) \ + do { \ + old_fsuid = setfsuid(uid); \ + old_fsgid = setfsgid(gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() \ + do { \ + setfsuid(old_fsuid); \ + setfsgid(old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +static gf_boolean_t +posix_symlinks_match(xlator_t *this, loc_t *loc, uuid_t gfid) +{ + struct posix_private *priv = NULL; + char linkname_actual[PATH_MAX] = { + 0, + }; + char linkname_expected[PATH_MAX] = {0}; + char *dir_handle = NULL; + ssize_t len = 0; + size_t handle_size = 0; + gf_boolean_t ret = _gf_false; + + priv = this->private; + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + dir_handle = alloca0(handle_size); + + snprintf(linkname_expected, PATH_MAX, "../../%02x/%02x/%s/%s", + loc->pargfid[0], loc->pargfid[1], uuid_utoa(loc->pargfid), + loc->name); + + MAKE_HANDLE_GFID_PATH(dir_handle, this, gfid); + len = sys_readlink(dir_handle, linkname_actual, PATH_MAX); + if (len < 0 || len == PATH_MAX) { + if (len == PATH_MAX) { + errno = EINVAL; + } + + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "readlink[%s] failed", dir_handle); + } + goto out; + } + linkname_actual[len] = '\0'; + + if (!strcmp(linkname_actual, linkname_expected)) + ret = _gf_true; + +out: + return ret; +} + +static dict_t * +posix_dict_set_nlink(dict_t *req, dict_t *res, int32_t nlink) +{ + int ret = -1; + + if (req == NULL || !dict_get_sizen(req, GF_REQUEST_LINK_COUNT_XDATA)) + goto out; + + if (res == NULL) + res = dict_new(); + if (res == NULL) + goto out; + + ret = dict_set_uint32(res, GF_RESPONSE_LINK_COUNT_XDATA, nlink); + if (ret == -1) + gf_msg("posix", GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "Failed to set GF_RESPONSE_LINK_COUNT_XDATA"); +out: + return res; +} + +/* Regular fops */ + +int32_t +posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + struct iatt buf = { + 0, + }; + int32_t op_ret = -1; + int32_t entry_ret = 0; + int32_t op_errno = 0; + dict_t *xattr = NULL; + char *real_path = NULL; + char *par_path = NULL; + char *gfid_path = NULL; + uuid_t gfid = {0}; + struct iatt postparent = { + 0, + }; + struct stat statbuf = {0}; + int32_t gfidless = 0; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + struct posix_private *priv = NULL; + posix_inode_ctx_t *ctx = NULL; + int ret = 0; + int dfd = -1; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + + /* The Hidden directory should be for housekeeping purpose and it + should not get any gfid on it */ + if (__is_root_gfid(loc->pargfid) && loc->name && + (strcmp(loc->name, GF_HIDDEN_PATH) == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED, + "Lookup issued on %s," + " which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && loc->name && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED, + "Lookup issued on .attribute," + " which is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif /* __NetBSD__ */ + + op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless); + op_ret = -1; + if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) { + /* nameless lookup */ + MAKE_INODE_HANDLE(real_path, this, loc, &buf); + } else { + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + if (gf_uuid_is_null(loc->inode->gfid)) { + op_ret = posix_gfid_heal(this, real_path, loc, xdata); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; + } + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf); + } + } + + op_errno = errno; + + if (op_ret == -1) { + if (op_errno != ENOENT) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_path ? real_path : "null"); + } + entry_ret = -1; + if (loc_is_nameless(loc)) { + if (!op_errno) + op_errno = ESTALE; + loc_gfid(loc, gfid); + MAKE_HANDLE_ABSPATH_FD(gfid_path, this, gfid, dfd); + ret = sys_fstatat(dfd, gfid_path, &statbuf, 0); + if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR)) + /*Don't unset if it was a symlink to a dir.*/ + goto parent; + ret = sys_fstatat(dfd, gfid_path, &statbuf, AT_SYMLINK_NOFOLLOW); + if (ret == 0 && statbuf.st_nlink == 1) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_HANDLE_DELETE, + "Found stale gfid " + "handle %s, removing it.", + gfid_path); + posix_handle_unset(this, gfid, NULL); + } + } + goto parent; + } + + if (xdata && (op_ret == 0)) { + xattr = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &buf); + + posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, + &xattr, _gf_true); + + if (dict_get_sizen(xdata, GF_CLEAN_WRITE_PROTECTION)) { + ret = sys_lremovexattr(real_path, GF_PROTECT_FROM_EXTERNAL_WRITES); + if (ret == -1 && (errno != ENODATA && errno != ENOATTR)) + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, + "removexattr failed. key %s path %s", + GF_PROTECT_FROM_EXTERNAL_WRITES, loc->path); + } + } + + posix_update_iatt_buf(&buf, -1, real_path, xdata); + if (priv->update_pgfid_nlinks) { + if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + + op_ret = posix_inode_ctx_get_all(loc->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&ctx->pgfid_lock); + { + SET_PGFID_XATTR_IF_ABSENT(real_path, pgfid_xattr_key, + nlink_samepgfid, XATTR_CREATE, op_ret, + this, unlock); + } + unlock: + pthread_mutex_unlock(&ctx->pgfid_lock); + } + } + +parent: + if (par_path) { + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, + &postparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on" + " parent %s failed", + par_path); + if (op_errno == ENOENT) + /* If parent directory is missing in a lookup, + errno should be ESTALE (bad handle) and not + ENOENT (missing entry) + */ + op_errno = ESTALE; + goto out; + } + } + + op_ret = entry_ret; +out: + if (!op_ret && !gfidless && gf_uuid_is_null(buf.ia_gfid)) { + gf_msg(this->name, GF_LOG_ERROR, ENODATA, P_MSG_NULL_GFID, + "buf->ia_gfid is null for " + "%s", + (real_path) ? real_path : ""); + op_ret = -1; + op_errno = ENODATA; + } + + if (op_ret == 0) + op_errno = 0; + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, + (loc) ? loc->inode : NULL, &buf, xattr, &postparent); + + if (xattr) + dict_unref(xattr); + + return 0; +} + +static int32_t +posix_set_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, + const char *bname) +{ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + char *key = NULL; + const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + + GF_XXH64_DIGEST_LENGTH * 2 + 1; + int ret = 0; + int len; + + len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), + bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, len, + GF_XXHSUM64_DEFAULT_SEED, xxh64); + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + ret = sys_lsetxattr(path, key, pgfid_bname, len, XATTR_CREATE); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, + "setting gfid2path xattr failed on %s: key = %s ", path, key); + } + + return ret; +} + +int +posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) +{ + int tmp_fd = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + char *par_path = 0; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + uuid_t uuid_req = { + 0, + }; + int32_t nlink_samepgfid = 0; + char *pgfid_xattr_key = NULL; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + gf_boolean_t linked = _gf_false; + gf_loglevel_t level = GF_LOG_NONE; + mode_t mode_bit = 0; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL); + + mode_bit = (priv->create_mask & mode) | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + + gid = frame->root->gid; + + SET_FS_ID(frame->root->uid, gid); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent of %s failed", real_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + /* Check if the 'gfid' already exists, because this mknod may be an + internal call from distribute for creating 'linkfile', and that + linkfile may be for a hardlinked file */ + if (dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY); + /* trash xlator did not bring the uuid_via the call + * to GFID_NULL_CHECK_AND_GOTO() above. + * Fetch it explicitly here. + */ + if (frame->root->pid == GF_SERVER_PID_TRASH) { + op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_msg_debug(this->name, 0, + "failed to get the gfid from dict for %s", + loc->path); + goto real_op; + } + } + + op_ret = posix_create_link_if_gfid_exists(this, uuid_req, real_path, + loc->inode->table); + if (!op_ret) { + linked = _gf_true; + goto post_op; + } + } + +real_op: +#ifdef __NetBSD__ + if (S_ISFIFO(mode)) + op_ret = mkfifo(real_path, mode); + else +#endif /* __NetBSD__ */ + op_ret = sys_mknod(real_path, mode, dev); + + if (op_ret == -1) { + op_errno = errno; + if ((op_errno == EINVAL) && S_ISREG(mode)) { + /* Over Darwin, mknod with (S_IFREG|mode) + doesn't work */ + tmp_fd = sys_creat(real_path, mode); + if (tmp_fd == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CREATE_FAILED, + "create failed on" + "%s", + real_path); + goto out; + } + sys_close(tmp_fd); + } else { + if (op_errno == EEXIST) + level = GF_LOG_DEBUG; + else + level = GF_LOG_ERROR; + gf_msg(this->name, level, errno, P_MSG_MKNOD_FAILED, + "mknod on %s failed", real_path); + goto out; + } + } + + entry_created = _gf_true; + +#ifndef HAVE_SET_FSID + op_ret = sys_lchown(real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, + "lchown on %s failed", real_path); + goto out; + } +#endif + +post_op: + op_ret = posix_acl_xattr_set(this, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_ACL_FAILED, + "setting ACLs on %s failed", real_path); + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + op_ret = posix_inode_ctx_get_all(loc->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&ctx->pgfid_lock); + { + LINK_MODIFY_PGFID_XATTR(real_path, pgfid_xattr_key, nlink_samepgfid, + 0, op_ret, this, unlock); + } + unlock: + pthread_mutex_unlock(&ctx->pgfid_lock); + } + + if (priv->gfid2path) { + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } + + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + if (errno != EEXIST) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed", real_path); + else + gf_msg_debug(this->name, 0, "setting xattrs on %s failed", + real_path); + } + + if (!linked) { + op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid, + &op_errno); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + goto out; + } else { + gfid_set = _gf_true; + } + } + + op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKNOD_FAILED, + "mknod on %s failed", real_path); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_path); + goto out; + } + + posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + if (op_ret < 0) { + if (entry_created) { + if (S_ISREG(mode)) + sys_unlink(real_path); + else + sys_rmdir(real_path); + } + + if (gfid_set) + posix_gfid_unset(this, xdata); + } + + STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, + (loc) ? loc->inode : NULL, &stbuf, &preparent, + &postparent, NULL); + + return 0; +} + +int +posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL, *gfid_path = NULL; + char *par_path = NULL, *xattr_name = NULL; + int xattr_name_len; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + uuid_t uuid_req = { + 0, + }; + ssize_t size = 0; + dict_t *xdata_rsp = NULL; + char *disk_xattr = NULL; + data_t *arg_data = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + mode_t mode_bit = 0; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + /* The Hidden directory should be for housekeeping purpose and it + should not get created from a user request */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, GF_HIDDEN_PATH) == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED, + "mkdir issued on %s, which" + "is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED, + "mkdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + gid = frame->root->gid; + + op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); + + SET_FS_ID(frame->root->uid, gid); + + mode_bit = (priv->create_directory_mask & mode) | + priv->force_directory_mode; + mode = posix_override_umask(mode, mode_bit); + + if (xdata) { + if (!gf_uuid_compare(stbuf.ia_gfid, uuid_req)) { + op_ret = -1; + op_errno = EEXIST; + goto out; + } + } + + if (!gf_uuid_is_null(uuid_req)) { + op_ret = posix_istat(this, loc->inode, uuid_req, NULL, &stbuf); + if ((op_ret == 0) && IA_ISDIR(stbuf.ia_type)) { + gfid_path = alloca(PATH_MAX); + size = posix_handle_path(this, uuid_req, NULL, gfid_path, PATH_MAX); + if (size <= 0) { + op_errno = ESTALE; + op_ret = -1; + goto out; + } + + if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_OF_SAME_ID, + "mkdir (%s): " + "gfid (%s) is already associated with " + "directory (%s). Hence, both " + "directories will share same gfid and " + "this can lead to inconsistencies.", + loc->path, uuid_utoa(uuid_req), + gfid_path ? gfid_path : "<NULL>"); + + gf_event(EVENT_POSIX_SAME_GFID, + "gfid=%s;" + "path=%s;newpath=%s;brick=%s:%s", + uuid_utoa(uuid_req), gfid_path ? gfid_path : "<NULL>", + loc->path, priv->hostname, priv->base_path); + } + if (!posix_symlinks_match(this, loc, uuid_req)) + /* For afr selfheal of dir renames, we need to + * remove the old symlink in order for + * posix_gfid_set to set the symlink to the + * new dir.*/ + posix_handle_unset(this, stbuf.ia_gfid, NULL); + } + } else if (frame->root->pid != GF_SERVER_PID_TRASH) { + op_ret = -1; + op_errno = EPERM; + gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno, P_MSG_NULL_GFID, + "mkdir (%s): is issued without " + "gfid-req %p", + loc->path, xdata); + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + mode |= S_ISGID; + } + + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name != NULL) { + xattr_name_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_name_len); + if (arg_data) { + if (loc->parent) + gf_uuid_unparse(loc->parent->gfid, pgfid); + else + gf_uuid_unparse(loc->pargfid, pgfid); + + size = 256; + disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!disk_xattr) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): GF_MALLOC failed during" + " preop of mkdir (%s)", + pgfid, loc->name, real_path); + goto out; + } + disk_xattr[size] = '\0'; + + size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size); + if (size == -1) { + if (disk_xattr) { + GF_FREE(disk_xattr); + disk_xattr = NULL; + } + if (errno != ERANGE) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr failed during" + " preop of mkdir (%s).", + pgfid, loc->name, real_path); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on key " + "(%s) path (%s) failed due to " + " buffer overflow", + pgfid, loc->name, xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on key (%s)" + " path (%s) failed ", + pgfid, loc->name, xattr_name, par_path); + goto out; + } + disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!disk_xattr) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): GF_MALLOC failed during" + " preop of mkdir (%s)", + pgfid, loc->name, real_path); + goto out; + } + disk_xattr[size] = '\0'; + size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size); + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on " + " key (%s) path (%s) failed " + "(%s)", + pgfid, loc->name, xattr_name, par_path, + strerror(errno)); + goto out; + } + } + if ((arg_data->len != size) || + (memcmp(arg_data->data, disk_xattr, size))) { + gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): failing preop of " + "mkdir (%s) as on-disk" + " xattr value differs from argument " + "value for key %s", + pgfid, loc->name, real_path, xattr_name); + op_ret = -1; + op_errno = EIO; + + xdata_rsp = dict_new(); + if (xdata_rsp == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): " + "dict allocation failed", + pgfid, loc->name); + op_errno = ENOMEM; + goto out; + } + + op_errno = dict_set_int8(xdata_rsp, GF_PREOP_CHECK_FAILED, 1); + if (op_errno < 0) + op_errno = errno; + goto out; + } + + dict_deln(xdata, xattr_name, xattr_name_len); + } + + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + } + + op_ret = sys_mkdir(real_path, mode); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED, + "mkdir of %s failed", real_path); + goto out; + } + + entry_created = _gf_true; + +#ifndef HAVE_SET_FSID + op_ret = sys_chown(real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, + "chown on %s failed", real_path); + goto out; + } +#endif + op_ret = posix_acl_xattr_set(this, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, + "setting ACLs on %s failed ", real_path); + } + + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed", real_path); + } + + op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid, + &op_errno); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + goto out; + } else { + gfid_set = _gf_true; + } + + op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_path); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent of %s failed", real_path); + goto out; + } + + posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + if (disk_xattr) + GF_FREE(disk_xattr); + + if (op_ret < 0) { + if (entry_created) + sys_rmdir(real_path); + + if (gfid_set) + posix_gfid_unset(this, xdata); + } + + STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, + (loc) ? loc->inode : NULL, &stbuf, &preparent, + &postparent, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + + return 0; +} + +static int +posix_add_unlink_to_ctx(inode_t *inode, xlator_t *this, char *unlink_path) +{ + uint64_t ctx = GF_UNLINK_FALSE; + int ret = 0; + + if (!unlink_path) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, + "Creation of unlink entry failed"); + ret = -1; + goto out; + } + + ctx = GF_UNLINK_TRUE; + ret = posix_inode_ctx_set_unlink_flag(inode, this, ctx); + if (ret < 0) { + goto out; + } + +out: + return ret; +} + +static int32_t +posix_move_gfid_to_unlink(xlator_t *this, uuid_t gfid, loc_t *loc) +{ + char *unlink_path = NULL; + char *gfid_path = NULL; + int ret = -1; + struct posix_private *priv_posix = NULL; + + priv_posix = (struct posix_private *)this->private; + + MAKE_HANDLE_GFID_PATH(gfid_path, this, gfid); + + POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, loc->inode->gfid, + unlink_path); + if (!unlink_path) { + ret = -1; + goto out; + } + gf_msg_debug(this->name, 0, "Moving gfid: %s to unlink_path : %s", + gfid_path, unlink_path); + ret = sys_rename(gfid_path, unlink_path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED, + "Creation of unlink entry failed for gfid: %s", unlink_path); + goto out; + } + ret = posix_add_unlink_to_ctx(loc->inode, this, unlink_path); + if (ret < 0) + goto out; + +out: + return ret; +} + +static int32_t +posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, + const char *real_path, struct iatt *stbuf, + int32_t *op_errno, loc_t *loc, + gf_boolean_t get_link_count, + dict_t *rsp_dict) +{ + int32_t ret = 0; + struct iatt prebuf = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t update_ctime = _gf_false; + + /* Unlink the gfid_handle_first */ + if (stbuf && stbuf->ia_nlink == 1) { + LOCK(&loc->inode->lock); + + if (loc->inode->fd_count == 0) { + UNLOCK(&loc->inode->lock); + ret = posix_handle_unset(this, stbuf->ia_gfid, NULL); + } else { + UNLOCK(&loc->inode->lock); + ret = posix_move_gfid_to_unlink(this, stbuf->ia_gfid, loc); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED, + "unlink of gfid handle " + "failed for path:%s with gfid %s", + real_path, uuid_utoa(stbuf->ia_gfid)); + } + } else { + update_ctime = _gf_true; + } + + if (get_link_count) { + LOCK(&loc->inode->lock); + locked = _gf_true; + /* Since this stat is to get link count and not for time + * attributes, intentionally passing inode as NULL + */ + ret = posix_pstat(this, NULL, loc->gfid, real_path, &prebuf, _gf_true); + if (ret) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_path); + goto err; + } + } + + /* Unlink the actual file */ + ret = sys_unlink(real_path); + + if (locked) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; + } + + if (ret == -1) { + if (op_errno) + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED, + "unlink of %s failed", real_path); + goto err; + } + + if (update_ctime) { + posix_set_ctime(frame, this, NULL, -1, loc->inode, stbuf); + } + + ret = dict_set_uint32(rsp_dict, GET_LINK_COUNT, prebuf.ia_nlink); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set " GET_LINK_COUNT " for %s", real_path); + + return 0; + +err: + if (locked) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; + } + return -1; +} + +static gf_boolean_t +posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, + const int keylen, const char *linkto_xattr, + struct iatt *stbuf, const char *real_path) +{ + gf_boolean_t skip_unlink = _gf_false; + gf_boolean_t is_dht_linkto_file = _gf_false; + int unlink_if_linkto = 0; + ssize_t xattr_size = -1; + int op_ret = -1; + + op_ret = dict_get_int32n(xdata, key, keylen, &unlink_if_linkto); + + if (!op_ret && unlink_if_linkto) { + is_dht_linkto_file = IS_DHT_LINKFILE_MODE(stbuf); + if (!is_dht_linkto_file) + return _gf_true; + + LOCK(&loc->inode->lock); + + xattr_size = sys_lgetxattr(real_path, linkto_xattr, NULL, 0); + + UNLOCK(&loc->inode->lock); + + if (xattr_size <= 0) + skip_unlink = _gf_true; + + gf_msg("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS, + "linkto_xattr status: %" PRIu32 " for %s", skip_unlink, + real_path); + } + return skip_unlink; +} + +static int32_t +posix_remove_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, + const char *bname) +{ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + int ret = 0; + char *key = NULL; + const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + + GF_XXH64_DIGEST_LENGTH * 2 + 1; + int len; + + len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), + bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, len, + GF_XXHSUM64_DEFAULT_SEED, xxh64); + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + ret = sys_lremovexattr(path, key); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, + "removing gfid2path xattr failed on %s: key = %s", path, key); + } + + return ret; +} + +int32_t +posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + int ret = -1; + struct iatt stbuf = { + 0, + }; + struct iatt postbuf = { + 0, + }; + struct posix_private *priv = NULL; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + int32_t check_open_fd = 0; + int32_t skip_unlink = 0; + int32_t fdstat_requested = 0; + dict_t *unwind_dict = NULL; + gf_boolean_t get_link_count = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(loc, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_path); + goto out; + } + + priv = this->private; + + op_ret = dict_get_int32_sizen(xdata, DHT_SKIP_OPEN_FD_UNLINK, + &check_open_fd); + + if (!op_ret && check_open_fd) { + LOCK(&loc->inode->lock); + + if (loc->inode->fd_count) { + skip_unlink = 1; + } + + UNLOCK(&loc->inode->lock); + + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_KEY_STATUS_INFO, + "open-fd-key-status: %" PRIu32 " for %s", skip_unlink, + real_path); + + if (skip_unlink) { + op_ret = -1; + op_errno = EBUSY; + goto out; + } + } + /* + * If either of the function return true, skip_unlink. + * If first first function itself return true, + * we don't need to call second function, skip unlink. + */ + skip_unlink = posix_skip_non_linkto_unlink( + xdata, loc, DHT_SKIP_NON_LINKTO_UNLINK, + SLEN(DHT_SKIP_NON_LINKTO_UNLINK), DHT_LINKTO, &stbuf, real_path); + if (skip_unlink) { + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (IA_ISREG(loc->inode->ia_type) && xdata && + dict_get_sizen(xdata, DHT_IATT_IN_XDATA_KEY)) { + fdstat_requested = 1; + } + + if (fdstat_requested || + (priv->background_unlink && IA_ISREG(loc->inode->ia_type))) { + fd = sys_open(real_path, O_RDONLY, 0); + if (fd == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED, + "open of %s failed", real_path); + goto out; + } + } + + if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + op_ret = posix_inode_ctx_get_all(loc->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + pthread_mutex_lock(&ctx->pgfid_lock); + { + UNLINK_MODIFY_PGFID_XATTR(real_path, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, this, unlock); + } + unlock: + pthread_mutex_unlock(&ctx->pgfid_lock); + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, + "modification of " + "parent gfid xattr failed (path:%s gfid:%s)", + real_path, uuid_utoa(loc->inode->gfid)); + if (op_errno != ENOATTR) + /* Allow unlink if pgfid xattr is not set. */ + goto out; + } + } + + if (priv->gfid2path && (stbuf.ia_nlink > 1)) { + op_ret = posix_remove_gfid2path_xattr(this, real_path, loc->pargfid, + loc->name); + if (op_ret < 0) { + /* Allow unlink if pgfid xattr is not set. */ + if (errno != ENOATTR) + goto out; + } + } + + unwind_dict = dict_new(); + if (!unwind_dict) { + op_errno = ENOMEM; + op_ret = -1; + goto out; + } + + if (xdata && dict_get_sizen(xdata, GF_GET_FILE_BLOCK_COUNT)) { + ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT, + stbuf.ia_blocks); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT); + } + + if (xdata && dict_get_sizen(xdata, GET_LINK_COUNT)) + get_link_count = _gf_true; + op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf, + &op_errno, loc, get_link_count, + unwind_dict); + if (op_ret == -1) { + goto out; + } + + if (fdstat_requested) { + op_ret = posix_fdstat(this, loc->inode, fd, &postbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post operation " + "fstat failed on fd=%d", + fd); + goto out; + } + op_ret = posix_set_iatt_in_dict(unwind_dict, NULL, &postbuf); + if (op_ret == -1) { + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_DICT_SET_FAILED, + "failed to set fdstat in dict"); + } + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_path); + goto out; + } + + posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent); + + unwind_dict = posix_dict_set_nlink(xdata, unwind_dict, stbuf.ia_nlink); + op_ret = 0; +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, &preparent, + &postparent, unwind_dict); + + if (fd != -1) { + sys_close(fd); + } + + /* unref unwind_dict*/ + if (unwind_dict) { + dict_unref(unwind_dict); + } + + return 0; +} + +int +posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + char *gfid_str = NULL; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + char tmp_path[PATH_MAX] = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + /* The Hidden directory should be for housekeeping purpose and it + should not get deleted from inside process */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, GF_HIDDEN_PATH) == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED, + "rmdir issued on %s, which" + "is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED, + "rmdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + + priv = this->private; + + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_path); + goto out; + } + + if (flags) { + op_ret = sys_mkdir(priv->trash_path, 0755); + if (errno != EEXIST && op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED, + "mkdir of %s failed", priv->trash_path); + } else { + gfid_str = uuid_utoa(stbuf.ia_gfid); + (void)snprintf(tmp_path, sizeof(tmp_path), "%s/%s", + priv->trash_path, gfid_str); + gf_msg_debug(this->name, 0, "Moving %s to %s", real_path, tmp_path); + op_ret = sys_rename(real_path, tmp_path); + } + } else { + op_ret = sys_rmdir(real_path); + } + op_errno = errno; + + if (op_ret == 0) { + if (posix_symlinks_match(this, loc, stbuf.ia_gfid)) + posix_handle_unset(this, stbuf.ia_gfid, NULL); + } + + if (op_errno == EEXIST) + /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ + op_errno = ENOTEMPTY; + + /* No need to log a common error as ENOTEMPTY */ + if (op_ret == -1 && op_errno != ENOTEMPTY) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_RMDIR_FAILED, + "rmdir of %s failed", real_path); + } + + if (op_ret == -1) { + if (op_errno == ENOTEMPTY) { + gf_msg_debug(this->name, 0, "%s on %s failed", + (flags) ? "rename" : "rmdir", real_path); + } else { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + P_MSG_DIR_OPERATION_FAILED, "%s on %s failed", + (flags) ? "rename" : "rmdir", real_path); + } + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent of %s failed", par_path); + goto out; + } + + posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent); + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, &preparent, &postparent, + NULL); + + return 0; +} + +int +posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + char *par_path = 0; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + uuid_t uuid_req = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(linkname, out); + VALIDATE_OR_GOTO(loc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); + + gid = frame->root->gid; + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + SET_FS_ID(frame->root->uid, gid); + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + op_ret = sys_symlink(linkname, real_path); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_SYMLINK_FAILED, + "symlink of %s --> %s failed", real_path, linkname); + goto out; + } + + entry_created = _gf_true; + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); + +#ifndef HAVE_SET_FSID + op_ret = sys_lchown(real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, + "lchown failed on %s", real_path); + goto out; + } +#endif + op_ret = posix_acl_xattr_set(this, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, + "setting ACLs on %s failed", real_path); + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + nlink_samepgfid = 1; + SET_PGFID_XATTR(real_path, pgfid_xattr_key, nlink_samepgfid, + XATTR_CREATE, op_ret, this, ignore); + } + + if (priv->gfid2path) { + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } + +ignore: + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); + } + + op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid, + &op_errno); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + goto out; + } else { + gfid_set = _gf_true; + } + + op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", real_path); + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_path); + goto out; + } + + posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + if (op_ret < 0) { + if (entry_created) + sys_unlink(real_path); + + if (gfid_set) + posix_gfid_unset(this, xdata); + } + + STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, + (loc) ? loc->inode : NULL, &stbuf, &preparent, + &postparent, NULL); + + return 0; +} + +int +posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_oldpath = NULL; + char *real_newpath = NULL; + char *par_oldpath = NULL; + char *par_newpath = NULL; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + char was_present = 1; + struct iatt preoldparent = { + 0, + }; + struct iatt postoldparent = { + 0, + }; + struct iatt prenewparent = { + 0, + }; + struct iatt postnewparent = { + 0, + }; + char olddirid[64]; + char newdirid[64]; + uuid_t victim = {0}; + int was_dir = 0; + int nlink = 0; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + char *gfid_path = NULL; + dict_t *unwind_dict = NULL; + gf_boolean_t locked = _gf_false; + gf_boolean_t get_link_count = _gf_false; + posix_inode_ctx_t *ctx_old = NULL; + posix_inode_ctx_t *ctx_new = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(oldloc, out); + VALIDATE_OR_GOTO(newloc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL); + if (!real_oldpath || !par_oldpath) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + MAKE_ENTRY_HANDLE(real_newpath, par_newpath, this, newloc, &stbuf); + if (!real_newpath || !par_newpath) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + unwind_dict = dict_new(); + if (!unwind_dict) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + op_ret = posix_pstat(this, oldloc->parent, oldloc->pargfid, par_oldpath, + &preoldparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_oldpath); + goto out; + } + + op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath, + &prenewparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent of %s failed", par_newpath); + goto out; + } + + op_ret = posix_pstat(this, newloc->inode, NULL, real_newpath, &stbuf, + _gf_false); + if ((op_ret == -1) && (errno == ENOENT)) { + was_present = 0; + } else { + gf_uuid_copy(victim, stbuf.ia_gfid); + if (IA_ISDIR(stbuf.ia_type)) + was_dir = 1; + nlink = stbuf.ia_nlink; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { + gf_msg(this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND, + "found directory at %s while expecting ENOENT", real_newpath); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && + gf_uuid_compare(newloc->inode->gfid, stbuf.ia_gfid)) { + gf_msg(this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND, + "found directory %s at %s while renaming %s", + uuid_utoa_r(newloc->inode->gfid, olddirid), real_newpath, + uuid_utoa_r(stbuf.ia_gfid, newdirid)); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + op_ret = posix_inode_ctx_get_all(oldloc->inode, this, &ctx_old); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + if (newloc->inode) { + op_ret = posix_inode_ctx_get_all(newloc->inode, this, &ctx_new); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + } + + if (IA_ISDIR(oldloc->inode->ia_type)) + posix_handle_unset(this, oldloc->inode->gfid, NULL); + + pthread_mutex_lock(&ctx_old->pgfid_lock); + { + if (!IA_ISDIR(oldloc->inode->ia_type) && priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + oldloc->pargfid); + UNLINK_MODIFY_PGFID_XATTR(real_oldpath, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, this, unlock); + } + + if ((xdata) && (dict_get(xdata, GET_LINK_COUNT)) && (real_newpath) && + (was_present) && ctx_new) { + pthread_mutex_lock(&ctx_new->pgfid_lock); + locked = _gf_true; + get_link_count = _gf_true; + op_ret = posix_pstat(this, newloc->inode, newloc->gfid, + real_newpath, &stbuf, _gf_false); + if ((op_ret == -1) && (errno != ENOENT)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_newpath); + goto unlock; + } + } + + op_ret = sys_rename(real_oldpath, real_newpath); + if (op_ret == -1) { + op_errno = errno; + if (op_errno == ENOTEMPTY) { + gf_msg_debug(this->name, 0, + "rename of %s to" + " %s failed: %s", + real_oldpath, real_newpath, strerror(op_errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_RENAME_FAILED, + "rename of %s to %s failed", real_oldpath, real_newpath); + } + + if (priv->update_pgfid_nlinks && + !IA_ISDIR(oldloc->inode->ia_type)) { + LINK_MODIFY_PGFID_XATTR(real_oldpath, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, this, + unlock); + } + + goto unlock; + } + + if (locked) { + pthread_mutex_unlock(&ctx_new->pgfid_lock); + locked = _gf_false; + } + + if ((get_link_count) && + (dict_set_uint32(unwind_dict, GET_LINK_COUNT, stbuf.ia_nlink))) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set " GET_LINK_COUNT " for %s", real_newpath); + + if (!IA_ISDIR(oldloc->inode->ia_type) && priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + newloc->pargfid); + LINK_MODIFY_PGFID_XATTR(real_newpath, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, this, unlock); + } + + if (!IA_ISDIR(oldloc->inode->ia_type) && priv->gfid2path) { + MAKE_HANDLE_ABSPATH(gfid_path, this, oldloc->inode->gfid); + + posix_remove_gfid2path_xattr(this, gfid_path, oldloc->pargfid, + oldloc->name); + posix_set_gfid2path_xattr(this, gfid_path, newloc->pargfid, + newloc->name); + } + } + +unlock: + if (locked) { + pthread_mutex_unlock(&ctx_new->pgfid_lock); + locked = _gf_false; + } + pthread_mutex_unlock(&ctx_old->pgfid_lock); + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, + "modification of " + "parent gfid xattr failed (gfid:%s)", + uuid_utoa(oldloc->inode->gfid)); + goto out; + } + + if (was_dir) + posix_handle_unset(this, victim, NULL); + + if (was_present && !was_dir && nlink == 1) + posix_handle_unset(this, victim, NULL); + + if (IA_ISDIR(oldloc->inode->ia_type)) { + posix_handle_soft(this, real_newpath, newloc, oldloc->inode->gfid, + NULL); + } + + op_ret = posix_pstat(this, newloc->inode, NULL, real_newpath, &stbuf, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_newpath); + goto out; + } + + /* Since the same inode is later used and dst inode is not present, + * update ctime on source inode. It can't use old path because it + * doesn't exist and xattr has to be stored on disk */ + posix_set_ctime(frame, this, real_newpath, -1, oldloc->inode, &stbuf); + + op_ret = posix_pstat(this, oldloc->parent, oldloc->pargfid, par_oldpath, + &postoldparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_oldpath); + goto out; + } + + posix_set_parent_ctime(frame, this, par_oldpath, -1, oldloc->parent, + &postoldparent); + + op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath, + &postnewparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_newpath); + goto out; + } + + posix_set_parent_ctime(frame, this, par_newpath, -1, newloc->parent, + &postnewparent); + + if (was_present) + unwind_dict = posix_dict_set_nlink(xdata, unwind_dict, nlink); + op_ret = 0; +out: + + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, &stbuf, &preoldparent, + &postoldparent, &prenewparent, &postnewparent, + unwind_dict); + + if (unwind_dict) + dict_unref(unwind_dict); + + return 0; +} + +int +posix_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_oldpath = 0; + char *real_newpath = 0; + char *par_newpath = 0; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + int32_t nlink_samepgfid = 0; + char *pgfid_xattr_key = NULL; + gf_boolean_t entry_created = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(oldloc, out); + VALIDATE_OR_GOTO(newloc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_oldpath, this, oldloc, &stbuf); + if (!real_oldpath) { + op_errno = errno; + goto out; + } + + if (priv->max_hardlinks && stbuf.ia_nlink >= priv->max_hardlinks) { + op_ret = -1; + op_errno = EMLINK; + gf_log(this->name, GF_LOG_ERROR, + "hardlink failed: %s exceeds max link count (%u/%u).", + real_oldpath, stbuf.ia_nlink, priv->max_hardlinks); + goto out; + } + + MAKE_ENTRY_HANDLE(real_newpath, par_newpath, this, newloc, &stbuf); + if (!real_newpath || !par_newpath) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath, + &preparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat failed: %s", par_newpath); + goto out; + } + + op_ret = sys_link(real_oldpath, real_newpath); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LINK_FAILED, + "link %s to %s failed", real_oldpath, real_newpath); + goto out; + } + + entry_created = _gf_true; + + op_ret = posix_pstat(this, newloc->inode, NULL, real_newpath, &stbuf, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_newpath); + goto out; + } + + posix_set_ctime(frame, this, real_newpath, -1, newloc->inode, &stbuf); + + op_ret = posix_pstat(this, newloc->parent, newloc->pargfid, par_newpath, + &postparent, _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat failed: %s", par_newpath); + goto out; + } + + posix_set_parent_ctime(frame, this, par_newpath, -1, newloc->parent, + &postparent); + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + newloc->pargfid); + + op_ret = posix_inode_ctx_get_all(newloc->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&ctx->pgfid_lock); + { + LINK_MODIFY_PGFID_XATTR(real_newpath, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, this, unlock); + } + unlock: + pthread_mutex_unlock(&ctx->pgfid_lock); + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, + "modification of " + "parent gfid xattr failed (path:%s gfid:%s)", + real_newpath, uuid_utoa(newloc->inode->gfid)); + goto out; + } + } + + if (priv->gfid2path) { + if (stbuf.ia_nlink <= MAX_GFID2PATH_LINK_SUP) { + op_ret = posix_set_gfid2path_xattr(this, real_newpath, + newloc->pargfid, newloc->name); + if (op_ret) { + op_errno = errno; + goto out; + } + } else { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_XATTR_NOTSUP, + "Link count exceeded. " + "gfid2path xattr not set (path:%s gfid:%s)", + real_newpath, uuid_utoa(newloc->inode->gfid)); + } + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, + (oldloc) ? oldloc->inode : NULL, &stbuf, &preparent, + &postparent, NULL); + + if (op_ret < 0) { + if (entry_created) + sys_unlink(real_newpath); + } + + return 0; +} + +int +posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t _fd = -1; + int _flags = 0; + char *real_path = NULL; + char *par_path = NULL; + struct iatt stbuf = { + 0, + }; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; + char was_present = 1; + + gid_t gid = 0; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + + int nlink_samepgfid = 0; + char *pgfid_xattr_key = NULL; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + mode_t mode_bit = 0; + uuid_t uuid_req = { + 0, + }; + + dict_t *xdata_rsp = dict_ref(xdata); + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); + + gid = frame->root->gid; + + SET_FS_ID(frame->root->uid, gid); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + if (!flags) { + _flags = O_CREAT | O_RDWR | O_EXCL; + } else { + _flags = flags | O_CREAT; + } + + op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); + if ((op_ret == -1) && (errno == ENOENT)) { + was_present = 0; + } + + if (!was_present) { + if (posix_is_layout_stale(xdata, par_path, this)) { + op_ret = -1; + op_errno = EIO; + if (!xdata_rsp) { + xdata_rsp = dict_new(); + if (!xdata_rsp) { + op_errno = ENOMEM; + goto out; + } + } + + if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == + -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, + "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); + } + + goto out; + } + } + + if (priv->o_direct) + _flags |= O_DIRECT; + + mode_bit = (priv->create_mask & mode) | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + _fd = sys_open(real_path, _flags, mode); + + if (_fd == -1) { + op_errno = errno; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED, + "open on %s failed", real_path); + goto out; + } + + if ((_flags & O_CREAT) && (_flags & O_EXCL)) { + entry_created = _gf_true; + } + + if (was_present) + goto fill_stat; + +#ifndef HAVE_SET_FSID + op_ret = sys_chown(real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, + "chown on %s failed", real_path); + } +#endif + op_ret = posix_acl_xattr_set(this, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, + "setting ACLs on %s failed", real_path); + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + nlink_samepgfid = 1; + SET_PGFID_XATTR(real_path, pgfid_xattr_key, nlink_samepgfid, + XATTR_CREATE, op_ret, this, ignore); + } + + if (priv->gfid2path) { + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } +ignore: + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); + } + +fill_stat: + op_ret = posix_gfid_set(this, real_path, loc, xdata, frame->root->pid, + &op_errno); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + goto out; + } else { + gfid_set = _gf_true; + } + + op_ret = posix_fdstat(this, loc->inode, _fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat on %d failed", _fd); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_path); + goto out; + } + + posix_set_parent_ctime(frame, this, par_path, -1, loc->parent, &postparent); + + op_ret = -1; + pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + if (op_ret < 0) { + if (_fd != -1) + sys_close(_fd); + + if (entry_created) + sys_unlink(real_path); + + if (gfid_set) + posix_gfid_unset(this, xdata); + } + + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, + (loc) ? loc->inode : NULL, &stbuf, &preparent, + &postparent, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + + return 0; +} + +/* TODO: Ensure atomocity of put, and rollback in case of failure + * One of the ways, is to perform put in the hidden directory + * and rename it to the specified location, if the put was successful + */ +int32_t +posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, uint32_t flags, struct iovec *vector, int32_t count, + off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + fd_t *fd = NULL; + char *real_path = NULL; + char *par_path = NULL; + struct iatt stbuf = { + 0, + }; + struct iatt preparent = { + 0, + }; + struct iatt postparent = { + 0, + }; + + MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); + + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, + _gf_false); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", par_path); + goto out; + } + fd = fd_create(loc->inode, getpid()); + if (!fd) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + fd->flags = flags; + + /* No xlators are expected below posix, but we cannot still call + * sys_create() directly here, as posix_create does many other things like + * chmod, setxattr etc. along with sys_create(). But we cannot also directly + * call posix_create() as it calls STACK_UNWIND. Hence using syncop() + */ + op_ret = syncop_create(this, loc, flags, mode, fd, &stbuf, xdata, NULL); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CREATE_FAILED, + "create of %s failed", loc->path); + goto out; + } + + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, + _gf_false); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", par_path); + goto out; + } + + op_ret = syncop_writev(this, fd, vector, count, offset, iobref, flags, NULL, + NULL, xdata, NULL); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITE_FAILED, + "write on file %s failed", loc->path); + goto out; + } + + op_ret = syncop_fsetxattr(this, fd, xattr, flags, xdata, NULL); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setxattr on file %s failed", loc->path); + goto out; + } + + op_ret = syncop_flush(this, fd, xdata, NULL); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CLOSE_FAILED, + "setxattr on file %s failed", loc->path); + goto out; + } + + op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &stbuf, + _gf_false); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on %s failed", real_path); + goto out; + } +out: + STACK_UNWIND_STRICT(put, frame, op_ret, op_errno, loc->inode, &stbuf, + &preparent, &postparent, NULL); + + return 0; +} diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c new file mode 100644 index 00000000000..1b38e9b0479 --- /dev/null +++ b/xlators/storage/posix/src/posix-gfid-path.c @@ -0,0 +1,243 @@ +/* + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdint.h> + +#include <glusterfs/compat-errno.h> +#include <glusterfs/syscall.h> +#include <glusterfs/logging.h> +#include "posix-messages.h" +#include "posix-mem-types.h" +#include "posix-gfid-path.h" +#include "posix.h" + +gf_boolean_t +posix_is_gfid2path_xattr(const char *name) +{ + if (name && strncmp(GFID2PATH_XATTR_KEY_PREFIX, name, + GFID2PATH_XATTR_KEY_PREFIX_LENGTH) == 0) + return _gf_true; + + return _gf_false; +} + +static int gf_posix_xattr_enotsup_log; + +int32_t +posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, + int *op_errno, dict_t *dict) +{ + int ret = 0; + char *path = NULL; + ssize_t size = 0; + char *list = NULL; + int32_t list_offset = 0; + int32_t i = 0; + int32_t j = 0; + char *paths[MAX_GFID2PATH_LINK_SUP] = { + NULL, + }; + char *value = NULL; + size_t remaining_size = 0; + size_t bytes = 0; + char keybuffer[4096] = { + 0, + }; + + uuid_t pargfid = { + 0, + }; + gf_boolean_t have_val = _gf_false; + struct posix_private *priv = NULL; + char pargfid_str[UUID_CANONICAL_FORM_LEN + 1] = { + 0, + }; + gf_boolean_t found = _gf_false; + int len; + + priv = this->private; + + if (IA_ISDIR(inode->ia_type)) { + ret = posix_resolve_dirgfid_to_path(inode->gfid, priv->base_path, NULL, + &path); + if (ret < 0) { + ret = -1; + goto err; + } + ret = dict_set_dynstr(dict, GFID2PATH_VIRT_XATTR_KEY, path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "could not set " + "value for key (%s)", + GFID2PATH_VIRT_XATTR_KEY); + goto err; + } + found = _gf_true; + } else { + char value_buf[8192] = { + 0, + }; + char xattr_value[8192] = { + 0, + }; + have_val = _gf_false; + size = sys_llistxattr(real_path, value_buf, sizeof(value_buf) - 1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of" + " buffer on %s ", + real_path); + size = sys_llistxattr(real_path, NULL, 0); + } + if (size == -1) { + *op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "listxattr failed on %s", real_path); + } + goto err; + } + if (size == 0) + goto done; + } + list = alloca(size); + if (!list) { + *op_errno = errno; + goto err; + } + if (have_val) { + memcpy(list, value_buf, size); + } else { + size = sys_llistxattr(real_path, list, size); + if (size < 0) { + ret = -1; + *op_errno = errno; + goto err; + } + } + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + len = snprintf(keybuffer, sizeof(keybuffer), "%s", + list + list_offset); + + if (!posix_is_gfid2path_xattr(keybuffer)) { + goto ignore; + } + + found = _gf_true; + size = sys_lgetxattr(real_path, keybuffer, xattr_value, + sizeof(xattr_value) - 1); + if (size == -1) { + ret = -1; + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on" + " %s: key = %s ", + real_path, keybuffer); + break; + } + + /* Parse pargfid from xattr value*/ + strncpy(pargfid_str, xattr_value, 36); + pargfid_str[36] = '\0'; + gf_uuid_parse(pargfid_str, pargfid); + + /* Convert pargfid to path */ + ret = posix_resolve_dirgfid_to_path(pargfid, priv->base_path, + &xattr_value[37], &paths[i]); + i++; + + ignore: + remaining_size -= (len + 1); + list_offset += (len + 1); + } /* while (remaining_size > 0) */ + + /* gfid2path xattr is absent in the list of xattrs */ + if (!found) { + ret = -1; + /* + * ENODATA because xattr is not present in the + * list of xattrs. Thus the consumer should + * face error instead of a success and a empty + * string in the dict for the key. + */ + *op_errno = ENODATA; + goto err; + } + + /* + * gfid2path xattr is found in list of xattrs, but getxattr + * on the 1st gfid2path xattr itself failed and the while + * loop above broke. So there is nothing in the value. So + * it would be better not to send "" as the value for any + * key, as it is not true. + */ + if (found && !i) + goto err; /* both errno and ret are set before beak */ + + /* Calculate memory to be allocated */ + for (j = 0; j < i; j++) { + bytes += strlen(paths[j]); + if (j < i - 1) + bytes += strlen(priv->gfid2path_sep); + } + value = GF_CALLOC(bytes + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + ret = -1; + *op_errno = errno; + goto err; + } + + for (j = 0; j < i; j++) { + strcat(value, paths[j]); + if (j != i - 1) + strcat(value, priv->gfid2path_sep); + } + value[bytes] = '\0'; + + ret = dict_set_dynptr(dict, GFID2PATH_VIRT_XATTR_KEY, value, bytes); + if (ret < 0) { + *op_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on %s for the key %s failed.", + real_path, GFID2PATH_VIRT_XATTR_KEY); + GF_FREE(value); + goto err; + } + } + +done: + for (j = 0; j < i; j++) { + if (paths[j]) + GF_FREE(paths[j]); + } + ret = 0; + return ret; +err: + if (path) + GF_FREE(path); + for (j = 0; j < i; j++) { + if (paths[j]) + GF_FREE(paths[j]); + } + return ret; +} diff --git a/xlators/storage/posix/src/posix-gfid-path.h b/xlators/storage/posix/src/posix-gfid-path.h new file mode 100644 index 00000000000..79096e5893f --- /dev/null +++ b/xlators/storage/posix/src/posix-gfid-path.h @@ -0,0 +1,28 @@ +/* + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _POSIX_GFID_PATH_H +#define _POSIX_GFID_PATH_H + +#include <glusterfs/compat-errno.h> + +#include <stdint.h> // for int32_t +#include "glusterfs/dict.h" // for dict_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t +#include "glusterfs/inode.h" // for inode_t +#include "uuid.h" // for uuid_t +#define MAX_GFID2PATH_LINK_SUP 500 + +gf_boolean_t +posix_is_gfid2path_xattr(const char *name); +int32_t +posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, + int *op_errno, dict_t *dict); +#endif /* _POSIX_GFID_PATH_H */ diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c index 6182fd8a630..410b38da8cb 100644 --- a/xlators/storage/posix/src/posix-handle.c +++ b/xlators/storage/posix/src/posix-handle.c @@ -7,11 +7,6 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <errno.h> #include <sys/types.h> #include <sys/stat.h> @@ -23,303 +18,377 @@ #include "posix-handle.h" #include "posix.h" -#include "xlator.h" -#include "syscall.h" +#include <glusterfs/syscall.h> +#include "posix-messages.h" +#include "posix-metadata.h" -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> + +int +posix_handle_mkdir_hashes(xlator_t *this, int dfd, uuid_t gfid); inode_t * -posix_resolve (xlator_t *this, inode_table_t *itable, inode_t *parent, - char *bname, struct iatt *iabuf) +posix_resolve(xlator_t *this, inode_table_t *itable, inode_t *parent, + char *bname, struct iatt *iabuf) { - inode_t *inode = NULL, *linked_inode = NULL; - int ret = -1; - - ret = posix_istat (this, parent->gfid, bname, iabuf); - if (ret < 0) - goto out; - - inode = inode_find (itable, iabuf->ia_gfid); + inode_t *inode = NULL; + int ret = -1; + + ret = posix_istat(this, NULL, parent->gfid, bname, iabuf); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "gfid: %s, bname: %s " + "failed", + uuid_utoa(parent->gfid), bname); + goto out; + } + + if (__is_root_gfid(iabuf->ia_gfid) && !strcmp(bname, "/")) { + inode = itable->root; + } else { + inode = inode_find(itable, iabuf->ia_gfid); if (inode == NULL) { - inode = inode_new (itable); - } - - linked_inode = inode_link (inode, parent, bname, iabuf); - - inode_unref (inode); + inode = inode_new(itable); + gf_uuid_copy(inode->gfid, iabuf->ia_gfid); + } + } + + /* posix_istat wouldn't have fetched posix_mdata_t i.e., + * time attributes as inode is passed as NULL, hence get + * here once you got the inode + */ + ret = posix_get_mdata_xattr(this, NULL, -1, inode, iabuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on gfid:%s", uuid_utoa(inode->gfid)); + goto out; + } + + /* Linking an inode here, can cause a race in posix_acl. + Parent inode gets linked here, but before + it reaches posix_acl_readdirp_cbk, create/lookup can + come on a leaf-inode, as parent-inode-ctx not yet updated + in posix_acl_readdirp_cbk, create and lookup can fail + with EACCESS. So do the inode linking in the quota xlator + + if (__is_root_gfid (iabuf->ia_gfid) && !strcmp (bname, "/")) + linked_inode = itable->root; + else + linked_inode = inode_link (inode, parent, bname, iabuf); + + inode_unref (inode);*/ out: - return linked_inode; + return inode; } int -posix_make_ancestral_node (const char *priv_base_path, char *path, int pathsize, - gf_dirent_t *head, - char *dir_name, struct iatt *iabuf, inode_t *inode, - int type, dict_t *xdata) +posix_make_ancestral_node(const char *priv_base_path, char *path, int pathsize, + gf_dirent_t *head, char *dir_name, struct iatt *iabuf, + inode_t *inode, int type, dict_t *xdata) { - gf_dirent_t *entry = NULL; - char real_path[PATH_MAX + 1] = {0, }, len = 0; - loc_t loc = {0, }; - int ret = -1; - - len = strlen (path) + strlen (dir_name) + 1; - if (len > pathsize) { - goto out; - } - - strcat (path, dir_name); - - if (type & POSIX_ANCESTRY_DENTRY) { - entry = gf_dirent_for_name (dir_name); - if (!entry) { - gf_log (THIS->name, GF_LOG_ERROR, - "could not create gf_dirent for entry %s: (%s)", - dir_name, strerror (errno)); - goto out; - } - - entry->d_stat = *iabuf; - entry->inode = inode_ref (inode); - - list_add_tail (&entry->list, &head->list); - strcpy (real_path, priv_base_path); - strcat (real_path, "/"); - strcat (real_path, path); - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - entry->dict = posix_xattr_fill (THIS, real_path, &loc, NULL, -1, - xdata, iabuf); - loc_wipe (&loc); - } - - ret = 0; + gf_dirent_t *entry = NULL; + char real_path[PATH_MAX + 1] = + { + 0, + }, + len = 0; + loc_t loc = { + 0, + }; + int ret = -1; + + len = strlen(path) + strlen(dir_name) + 1; + if (len > pathsize) { + goto out; + } + + strcat(path, dir_name); + if (*dir_name != '/') + strcat(path, "/"); + + if (type & POSIX_ANCESTRY_DENTRY) { + entry = gf_dirent_for_name(dir_name); + if (!entry) + goto out; + + entry->d_stat = *iabuf; + entry->inode = inode_ref(inode); + + list_add_tail(&entry->list, &head->list); + snprintf(real_path, sizeof(real_path), "%s/%s", priv_base_path, path); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + entry->dict = posix_xattr_fill(THIS, real_path, &loc, NULL, -1, xdata, + iabuf); + loc_wipe(&loc); + } + + ret = 0; out: - return ret; + return ret; } int -posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize, - gf_dirent_t *head, int type, uuid_t gfid, - const size_t handle_size, - const char *priv_base_path, inode_table_t *itable, - inode_t **parent, dict_t *xdata) +posix_make_ancestryfromgfid(xlator_t *this, char *path, int pathsize, + gf_dirent_t *head, int type, uuid_t gfid, + const size_t handle_size, + const char *priv_base_path, inode_table_t *itable, + inode_t **parent, dict_t *xdata, int32_t *op_errno) { - char *linkname = NULL; /* "../../<gfid[0]>/<gfid[1]/" - "<gfidstr>/<NAME_MAX>" */ - char *dir_handle = NULL; - char *dir_name = NULL; - char *pgfidstr = NULL; - char *saveptr = NULL; - ssize_t len = 0; - inode_t *inode = NULL; - struct iatt iabuf = {0, }; - int ret = -1; - uuid_t tmp_gfid = {0, }; - - if (!path || !parent || !priv_base_path || gf_uuid_is_null (gfid)) { - goto out; - } - - if (__is_root_gfid (gfid)) { - if (parent) { - if (*parent) { - inode_unref (*parent); - } - - *parent = inode_ref (itable->root); - } - - inode = itable->root; - - memset (&iabuf, 0, sizeof (iabuf)); - gf_uuid_copy (iabuf.ia_gfid, inode->gfid); - iabuf.ia_type = inode->ia_type; - - ret = posix_make_ancestral_node (priv_base_path, path, pathsize, - head, "/", &iabuf, inode, type, - xdata); - return ret; - } - - dir_handle = alloca (handle_size); - linkname = alloca (PATH_MAX); - snprintf (dir_handle, handle_size, "%s/%s/%02x/%02x/%s", - priv_base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], - uuid_utoa (gfid)); - - len = readlink (dir_handle, linkname, PATH_MAX); - if (len < 0) { - gf_log (this->name, GF_LOG_ERROR, "could not read the link " - "from the gfid handle %s (%s)", dir_handle, - strerror (errno)); - goto out; - } - - linkname[len] = '\0'; - - pgfidstr = strtok_r (linkname + SLEN("../../00/00/"), "/", &saveptr); - dir_name = strtok_r (NULL, "/", &saveptr); - - gf_uuid_parse (pgfidstr, tmp_gfid); - - ret = posix_make_ancestryfromgfid (this, path, pathsize, head, type, - tmp_gfid, handle_size, - priv_base_path, itable, parent, - xdata); - if (ret < 0) { - goto out; - } - - memset (&iabuf, 0, sizeof (iabuf)); - - inode = posix_resolve (this, itable, *parent, dir_name, &iabuf); - if (inode == NULL) { + char *linkname = NULL; /* "../../<gfid[0]>/<gfid[1]/" + "<gfidstr>/<NAME_MAX>" */ + char *dir_handle = NULL; + char *pgfidstr = NULL; + char *saveptr = NULL; + ssize_t len = 0; + inode_t *inode = NULL; + struct iatt iabuf = { + 0, + }; + int ret = -1; + uuid_t tmp_gfid = { + 0, + }; + char *dir_stack[PATH_MAX / 2 + 1]; /* Since PATH_MAX/2 also gives + an upper bound on depth of + directories tree */ + uuid_t gfid_stack[PATH_MAX / 2 + 1]; + + char *dir_name = NULL; + char *saved_dir = NULL; + int top = -1; + + if (!path || !parent || !priv_base_path || gf_uuid_is_null(gfid)) { + *op_errno = EINVAL; + goto out; + } + + dir_handle = alloca(handle_size); + linkname = alloca(PATH_MAX); + gf_uuid_copy(tmp_gfid, gfid); + + while (top < PATH_MAX / 2) { + gf_uuid_copy(gfid_stack[++top], tmp_gfid); + if (__is_root_gfid(tmp_gfid)) { + *parent = inode_ref(itable->root); + + saved_dir = alloca(sizeof("/")); + strcpy(saved_dir, "/"); + dir_stack[top] = saved_dir; + break; + } else { + snprintf(dir_handle, handle_size, "%s/%s/%02x/%02x/%s", + priv_base_path, GF_HIDDEN_PATH, tmp_gfid[0], tmp_gfid[1], + uuid_utoa(tmp_gfid)); + + len = sys_readlink(dir_handle, linkname, PATH_MAX); + if (len < 0) { + *op_errno = errno; + gf_msg(this->name, + (errno == ENOENT || errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + errno, P_MSG_READLINK_FAILED, + "could not read" + " the link from the gfid handle %s ", + dir_handle); ret = -1; goto out; + } + + linkname[len] = '\0'; + + pgfidstr = strtok_r(linkname + SLEN("../../00/00/"), "/", &saveptr); + dir_name = strtok_r(NULL, "/", &saveptr); + saved_dir = alloca(strlen(dir_name) + 1); + gf_uuid_parse(pgfidstr, tmp_gfid); + strcpy(saved_dir, dir_name); + dir_stack[top] = saved_dir; + } + } + if (top == PATH_MAX / 2) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_ANCESTORY_FAILED, 0, + "build ancestry failed due to " + "deep directory hierarchy, depth: %d.", + top); + *op_errno = EINVAL; + ret = -1; + goto out; + } + + while (top >= 0) { + if (!*parent) { + /* There's no real "root" cause for how we end up here, + * so for now let's log this and bail out to prevent + * crashes. + */ + gf_msg(this->name, GF_LOG_WARNING, P_MSG_INODE_RESOLVE_FAILED, 0, + "OOPS: *parent is null (path: %s), bailing!", path); + goto out; + } + + memset(&iabuf, 0, sizeof(iabuf)); + inode = posix_resolve(this, itable, *parent, dir_stack[top], &iabuf); + if (inode == NULL) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_INODE_RESOLVE_FAILED, 0, + "posix resolve on the inode %s failed", + uuid_utoa(gfid_stack[top])); + *op_errno = ESTALE; + ret = -1; + goto out; } - strcat (dir_name, "/"); - ret = posix_make_ancestral_node (priv_base_path, path, pathsize, head, - dir_name, &iabuf, inode, type, xdata); - if (*parent != NULL) { - inode_unref (*parent); + ret = posix_make_ancestral_node(priv_base_path, path, pathsize, head, + dir_stack[top], &iabuf, inode, type, + xdata); + if (ret < 0) { + *op_errno = ENOMEM; + goto out; } + inode_unref(*parent); *parent = inode; - + top--; + } out: - return ret; + return ret; } int -posix_handle_relpath (xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t buflen) +posix_handle_relpath(xlator_t *this, uuid_t gfid, const char *basename, + char *buf, size_t buflen) { - char *uuid_str = NULL; - int len = 0; - - len = SLEN("../") - + SLEN("../") - + SLEN("00/") - + SLEN("00/") - + SLEN(UUID0_STR) - + 1 /* '\0' */ - ; - - if (basename) { - len += (strlen (basename) + 1); - } + char *uuid_str = NULL; + int len = 0; - if (buflen < len || !buf) - return len; + len = POSIX_GFID_HANDLE_RELSIZE; - uuid_str = uuid_utoa (gfid); - - if (basename) { - len = snprintf (buf, buflen, "../../%02x/%02x/%s/%s", - gfid[0], gfid[1], uuid_str, basename); - } else { - len = snprintf (buf, buflen, "../../%02x/%02x/%s", - gfid[0], gfid[1], uuid_str); - } + if (basename) { + len += (strlen(basename) + 1); + } + if (buflen < len || !buf) return len; -} + uuid_str = uuid_utoa(gfid); + + if (basename) { + len = snprintf(buf, buflen, "../../%02x/%02x/%s/%s", gfid[0], gfid[1], + uuid_str, basename); + } else { + len = snprintf(buf, buflen, "../../%02x/%02x/%s", gfid[0], gfid[1], + uuid_str); + } + + return len; +} /* TODO: explain how this pump fixes ELOOP */ -int -posix_handle_pump (xlator_t *this, char *buf, int len, int maxlen, - char *base_str, int base_len, int pfx_len) +gf_boolean_t +posix_is_malformed_link(xlator_t *this, char *base_str, char *linkname, + size_t len) { - char linkname[512] = {0,}; /* "../../<gfid>/<NAME_MAX>" */ - int ret = 0; - int blen = 0; - int link_len = 0; - - /* is a directory's symlink-handle */ - ret = readlink (base_str, linkname, 512); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "internal readlink failed on %s (%s)", - base_str, strerror (errno)); - goto err; - } + if ((len == 8) && strcmp(linkname, "../../..")) /*for root*/ + goto err; - if (ret < 512) - linkname[ret] = 0; + if (len < 50 || len >= 512) + goto err; - link_len = ret; + if (memcmp(linkname, "../../", 6) != 0) + goto err; - if ((ret == 8) && memcmp (linkname, "../../..", 8) == 0) { - if (strcmp (base_str, buf) == 0) { - strcpy (buf + pfx_len, ".."); - } - goto out; - } + if ((linkname[2] != '/') || (linkname[5] != '/') || (linkname[8] != '/') || + (linkname[11] != '/') || (linkname[48] != '/')) { + goto err; + } - if (ret < 50 || ret >= 512) { - gf_log (this->name, GF_LOG_ERROR, - "malformed internal link %s for %s", - linkname, base_str); - goto err; - } + if ((linkname[20] != '-') || (linkname[25] != '-') || + (linkname[30] != '-') || (linkname[35] != '-')) { + goto err; + } - if (memcmp (linkname, "../../", 6) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "malformed internal link %s for %s", - linkname, base_str); - goto err; - } - - if ((linkname[2] != '/') || - (linkname[5] != '/') || - (linkname[8] != '/') || - (linkname[11] != '/') || - (linkname[48] != '/')) { - gf_log (this->name, GF_LOG_ERROR, - "malformed internal link %s for %s", - linkname, base_str); - goto err; - } + return _gf_false; - if ((linkname[20] != '-') || - (linkname[25] != '-') || - (linkname[30] != '-') || - (linkname[35] != '-')) { - gf_log (this->name, GF_LOG_ERROR, - "malformed internal link %s for %s", - linkname, base_str); - goto err; - } - - blen = link_len - 48; - - if (len + blen >= maxlen) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to form handle path for %s (maxlen = %d)", - buf, maxlen); - goto err; - } - - memmove (buf + base_len + blen, buf + base_len, - (strlen (buf) - base_len) + 1); - - strncpy (base_str + pfx_len, linkname + 6, 42); +err: + gf_log_callingfn(this->name, GF_LOG_ERROR, + "malformed internal link " + "%s for %s", + linkname, base_str); + return _gf_true; +} - strncpy (buf + pfx_len, linkname + 6, link_len - 6); +int +posix_handle_pump(xlator_t *this, char *buf, int len, int maxlen, + char *base_str, int base_len, int pfx_len) +{ + char linkname[512] = { + 0, + }; /* "../../<gfid>/<NAME_MAX>" */ + int ret = 0; + int blen = 0; + int link_len = 0; + char tmpstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + char d2[3] = { + 0, + }; + int index = 0; + int dirfd = 0; + struct posix_private *priv = this->private; + + strncpy(tmpstr, (base_str + pfx_len + 3), 40); + strncpy(d2, (base_str + pfx_len), 2); + index = strtoul(d2, NULL, 16); + dirfd = priv->arrdfd[index]; + + /* is a directory's symlink-handle */ + ret = readlinkat(dirfd, tmpstr, linkname, 512); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED, + "internal readlink failed on %s ", base_str); + goto err; + } + + if (ret < 512) + linkname[ret] = 0; + + link_len = ret; + + if ((ret == 8) && memcmp(linkname, "../../..", 8) == 0) { + if (strcmp(base_str, buf) == 0) { + strcpy(buf + pfx_len, ".."); + } + goto out; + } + + if (posix_is_malformed_link(this, base_str, linkname, ret)) + goto err; + + blen = link_len - 48; + + if (len + blen >= maxlen) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLEPATH_FAILED, + "Unable to form handle path for %s (maxlen = %d)", buf, maxlen); + goto err; + } + + memmove(buf + base_len + blen, buf + base_len, + (strlen(buf) - base_len) + 1); + + strncpy(base_str + pfx_len, linkname + 6, 42); + + strncpy(buf + pfx_len, linkname + 6, link_len - 6); out: - return len + blen; + return len + blen; err: - return -1; + return -1; } - /* posix_handle_path differs from posix_handle_gfid_path in the way that the path filled in @buf by posix_handle_path will return type IA_IFDIR when @@ -330,571 +399,622 @@ err: */ int -posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, - char *ubuf, size_t size) +posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, + size_t size) { - struct posix_private *priv = NULL; - char *uuid_str = NULL; - int len = 0; - int ret = -1; - struct stat stat; - char *base_str = NULL; - int base_len = 0; - int pfx_len; - int maxlen; - char *buf; - - priv = this->private; - - uuid_str = uuid_utoa (gfid); - - if (ubuf) { - buf = ubuf; - maxlen = size; - } else { - maxlen = PATH_MAX; - buf = alloca (maxlen); - } - - base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45); - base_str = alloca (base_len + 1); - base_len = snprintf (base_str, base_len + 1, "%s/%s/%02x/%02x/%s", - priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], - uuid_str); - - pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1; - - if (basename) { - len = snprintf (buf, maxlen, "%s/%s", base_str, basename); - } else { - len = snprintf (buf, maxlen, "%s", base_str); - } - - ret = lstat (base_str, &stat); - - if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1)) - goto out; - - do { - errno = 0; - ret = posix_handle_pump (this, buf, len, maxlen, - base_str, base_len, pfx_len); - len = ret; - - if (ret == -1) - break; - - ret = lstat (buf, &stat); - } while ((ret == -1) && errno == ELOOP); + struct posix_private *priv = NULL; + char *uuid_str = NULL; + int len = 0; + int ret = -1; + struct stat stat; + char *base_str = NULL; + int base_len = 0; + int pfx_len; + int maxlen; + char *buf; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + + priv = this->private; + + uuid_str = uuid_utoa(gfid); + + if (ubuf) { + buf = ubuf; + maxlen = size; + } else { + maxlen = PATH_MAX; + buf = alloca(maxlen); + } + + index = gfid[0]; + dfd = priv->arrdfd[index]; + + base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45); + base_str = alloca(base_len + 1); + base_len = snprintf(base_str, base_len + 1, "%s/%s/%02x/%02x/%s", + priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], + uuid_str); + pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1; + + if (basename) { + len = snprintf(buf, maxlen, "%s/%s", base_str, basename); + } else { + len = snprintf(buf, maxlen, "%s", base_str); + } + + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_str); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); + + if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1)) + goto out; + + do { + errno = 0; + ret = posix_handle_pump(this, buf, len, maxlen, base_str, base_len, + pfx_len); + len = ret; + + if (ret == -1) + break; + ret = sys_lstat(buf, &stat); + } while ((ret == -1) && errno == ELOOP); out: - return len + 1; + return len + 1; } - int -posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t buflen) +posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t buflen) { - struct posix_private *priv = NULL; - char *uuid_str = NULL; - int len = 0; - - priv = this->private; - - len = priv->base_path_length /* option directory "/export" */ - + SLEN("/") - + SLEN(GF_HIDDEN_PATH) - + SLEN("/") - + SLEN("00/") - + SLEN("00/") - + SLEN(UUID0_STR) - + 1 /* '\0' */ - ; - - if (basename) { - len += (strlen (basename) + 1); - } else { - len += 256; /* worst-case for directory's symlink-handle expansion */ - } + struct posix_private *priv = NULL; + char *uuid_str = NULL; + int len = 0; - if ((buflen < len) || !buf) - return len; + priv = this->private; - uuid_str = uuid_utoa (gfid); + len = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - if (__is_root_gfid (gfid)) { - if (basename) { - len = snprintf (buf, buflen, "%s/%s", priv->base_path, - basename); - } else { - strncpy (buf, priv->base_path, buflen); - } - goto out; - } + len += 256; /* worst-case for directory's symlink-handle expansion */ - if (basename) { - len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path, - GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str, basename); - } else { - len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path, - GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); - } -out: + if ((buflen < len) || !buf) return len; -} + uuid_str = uuid_utoa(gfid); + + if (__is_root_gfid(gfid)) { + len = snprintf(buf, buflen, "%s", priv->base_path); + } else { + len = snprintf(buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path, + GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); + } + + return len; +} int -posix_handle_init (xlator_t *this) +posix_handle_init(xlator_t *this) { - struct posix_private *priv = NULL; - char *handle_pfx = NULL; - int ret = 0; - struct stat stbuf; - struct stat rootbuf; - struct stat exportbuf; - char *rootstr = NULL; - uuid_t gfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; - - priv = this->private; - - ret = stat (priv->base_path, &exportbuf); - if (ret || !S_ISDIR (exportbuf.st_mode)) { - gf_log (this->name, GF_LOG_ERROR, - "Not a directory: %s", priv->base_path); - return -1; - } + struct posix_private *priv = NULL; + char *handle_pfx = NULL; + int ret = 0; + struct stat stbuf; + struct stat rootbuf; + struct stat exportbuf; + char *rootstr = NULL; + static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int dfd = 0; + + priv = this->private; + + ret = sys_stat(priv->base_path, &exportbuf); + if (ret || !S_ISDIR(exportbuf.st_mode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Not a directory: %s", priv->base_path); + return -1; + } - handle_pfx = alloca (priv->base_path_length + 1 + strlen (GF_HIDDEN_PATH) - + 1); + handle_pfx = alloca(priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1); - sprintf (handle_pfx, "%s/%s", priv->base_path, GF_HIDDEN_PATH); + sprintf(handle_pfx, "%s/%s", priv->base_path, GF_HIDDEN_PATH); - ret = stat (handle_pfx, &stbuf); - switch (ret) { + ret = sys_stat(handle_pfx, &stbuf); + switch (ret) { case -1: - if (errno == ENOENT) { - ret = mkdir (handle_pfx, 0600); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "Creating directory %s failed: %s", - handle_pfx, strerror (errno)); - return -1; - } - } else { - gf_log (this->name, GF_LOG_ERROR, - "Checking for %s failed: %s", - handle_pfx, strerror (errno)); - return -1; + if (errno == ENOENT) { + ret = sys_mkdir(handle_pfx, 0600); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", handle_pfx); + return -1; } - break; + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Checking for %s failed", handle_pfx); + return -1; + } + break; case 0: - if (!S_ISDIR (stbuf.st_mode)) { - gf_log (this->name, GF_LOG_ERROR, - "Not a directory: %s", - handle_pfx); - return -1; - } - break; + if (!S_ISDIR(stbuf.st_mode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Not a directory: %s", handle_pfx); + return -1; + } + break; default: - break; - } + break; + } - stat (handle_pfx, &priv->handledir); + ret = sys_stat(handle_pfx, &priv->handledir); - MAKE_HANDLE_ABSPATH(rootstr, this, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "stat for %s failed", handle_pfx); + return -1; + } - ret = stat (rootstr, &rootbuf); - switch (ret) { + MAKE_HANDLE_ABSPATH_FD(rootstr, this, gfid, dfd); + ret = sys_fstatat(dfd, rootstr, &rootbuf, 0); + switch (ret) { case -1: - if (errno != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "%s: %s", priv->base_path, - strerror (errno)); - return -1; - } - - ret = posix_handle_mkdir_hashes (this, rootstr); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "mkdir %s failed (%s)", - rootstr, strerror (errno)); - return -1; - } + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "%s", priv->base_path); + return -1; + } + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed", rootstr); + return -1; + } - ret = symlink ("../../..", rootstr); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "symlink %s creation failed (%s)", - rootstr, strerror (errno)); - return -1; - } - break; - case 0: - if ((exportbuf.st_ino == rootbuf.st_ino) && - (exportbuf.st_dev == rootbuf.st_dev)) - return 0; - - gf_log (this->name, GF_LOG_ERROR, - "Different dirs %s (%lld/%lld) != %s (%lld/%lld)", - priv->base_path, (long long) exportbuf.st_ino, - (long long) exportbuf.st_dev, rootstr, - (long long) rootbuf.st_ino, (long long) rootbuf.st_dev); + ret = sys_symlinkat("../../..", dfd, rootstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "symlink %s creation failed", rootstr); return -1; + } + break; + case 0: + if ((exportbuf.st_ino == rootbuf.st_ino) && + (exportbuf.st_dev == rootbuf.st_dev)) + return 0; - break; - } + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Different dirs %s (%lld/%lld) != %s (%lld/%lld)", + priv->base_path, (long long)exportbuf.st_ino, + (long long)exportbuf.st_dev, rootstr, + (long long)rootbuf.st_ino, (long long)rootbuf.st_dev); + return -1; - return 0; + break; + } + + return 0; } gf_boolean_t -posix_does_old_trash_exists (char *old_trash) +posix_does_old_trash_exists(char *old_trash) { - uuid_t gfid = {0}; - gf_boolean_t exists = _gf_false; - struct stat stbuf = {0}; - int ret = 0; - - ret = lstat (old_trash, &stbuf); - if ((ret == 0) && S_ISDIR (stbuf.st_mode)) { - ret = sys_lgetxattr (old_trash, "trusted.gfid", gfid, 16); - if ((ret < 0) && (errno == ENODATA || errno == ENOATTR) ) - exists = _gf_true; - } - return exists; + uuid_t gfid = {0}; + gf_boolean_t exists = _gf_false; + struct stat stbuf = {0}; + int ret = 0; + + ret = sys_lstat(old_trash, &stbuf); + if ((ret == 0) && S_ISDIR(stbuf.st_mode)) { + ret = sys_lgetxattr(old_trash, "trusted.gfid", gfid, 16); + if ((ret < 0) && (errno == ENODATA || errno == ENOATTR)) + exists = _gf_true; + } + return exists; } int -posix_handle_new_trash_init (xlator_t *this, char *trash) +posix_handle_new_trash_init(xlator_t *this, char *trash) { - int ret = 0; - struct stat stbuf = {0}; + int ret = 0; + struct stat stbuf = {0}; - ret = lstat (trash, &stbuf); - switch (ret) { + ret = sys_lstat(trash, &stbuf); + switch (ret) { case -1: - if (errno == ENOENT) { - ret = mkdir (trash, 0755); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "Creating directory %s failed: %s", - trash, strerror (errno)); - } - } else { - gf_log (this->name, GF_LOG_ERROR, "Checking for %s " - "failed: %s", trash, strerror (errno)); + if (errno == ENOENT) { + ret = sys_mkdir(trash, 0755); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_TRASH_CREATE, + "Creating directory %s failed", trash); } - break; + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_TRASH_CREATE, "Checking for %s failed", + trash); + } + break; case 0: - if (!S_ISDIR (stbuf.st_mode)) { - gf_log (this->name, GF_LOG_ERROR, - "Not a directory: %s", trash); - ret = -1; - } - break; + if (!S_ISDIR(stbuf.st_mode)) { + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_TRASH_CREATE, "Not a directory: %s", trash); + ret = -1; + } + break; default: - break; - } - return ret; + break; + } + return ret; } int -posix_mv_old_trash_into_new_trash (xlator_t *this, char *old, char *new) +posix_mv_old_trash_into_new_trash(xlator_t *this, char *old, char *new) { - char dest_old[PATH_MAX] = {0}; - int ret = 0; - uuid_t dest_name = {0}; - - if (!posix_does_old_trash_exists (old)) - goto out; - gf_uuid_generate (dest_name); - snprintf (dest_old, sizeof (dest_old), "%s/%s", new, - uuid_utoa (dest_name)); - ret = rename (old, dest_old); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Not able to move " - "%s -> %s (%s)", old, dest_old, strerror (errno)); - } + char dest_old[PATH_MAX] = {0}; + int ret = 0; + uuid_t dest_name = {0}; + + if (!posix_does_old_trash_exists(old)) + goto out; + gf_uuid_generate(dest_name); + snprintf(dest_old, sizeof(dest_old), "%s/%s", new, uuid_utoa(dest_name)); + ret = sys_rename(old, dest_old); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_TRASH_CREATE, + "Not able to move %s -> %s ", old, dest_old); + } out: - return ret; + return ret; } int -posix_handle_trash_init (xlator_t *this) +posix_handle_trash_init(xlator_t *this) { - int ret = -1; - struct posix_private *priv = NULL; - char old_trash[PATH_MAX] = {0}; - - priv = this->private; - - priv->trash_path = GF_CALLOC (1, priv->base_path_length + strlen ("/") - + strlen (GF_HIDDEN_PATH) + strlen ("/") - + strlen (TRASH_DIR) + 1, - gf_posix_mt_trash_path); - - if (!priv->trash_path) - goto out; - - strncpy (priv->trash_path, priv->base_path, priv->base_path_length); - strcat (priv->trash_path, "/" GF_HIDDEN_PATH "/" TRASH_DIR); - ret = posix_handle_new_trash_init (this, priv->trash_path); - if (ret) - goto out; - snprintf (old_trash, sizeof (old_trash), "%s/.landfill", - priv->base_path); - ret = posix_mv_old_trash_into_new_trash (this, old_trash, - priv->trash_path); + int ret = -1; + struct posix_private *priv = NULL; + char old_trash[PATH_MAX] = {0}; + + priv = this->private; + + priv->trash_path = GF_MALLOC(priv->base_path_length + SLEN("/") + + SLEN(GF_HIDDEN_PATH) + SLEN("/") + + SLEN(TRASH_DIR) + 1, + gf_posix_mt_trash_path); + + if (!priv->trash_path) + goto out; + + snprintf( + priv->trash_path, + priv->base_path_length + SLEN(GF_HIDDEN_PATH) + SLEN(TRASH_DIR) + 3, + "%s/%s/%s", priv->base_path, GF_HIDDEN_PATH, TRASH_DIR); + + ret = posix_handle_new_trash_init(this, priv->trash_path); + if (ret) + goto out; + snprintf(old_trash, sizeof(old_trash), "%s/.landfill", priv->base_path); + ret = posix_mv_old_trash_into_new_trash(this, old_trash, priv->trash_path); out: - return ret; + return ret; } int -posix_handle_mkdir_hashes (xlator_t *this, const char *newpath) +posix_handle_mkdir_hashes(xlator_t *this, int dirfd, uuid_t gfid) { - char *duppath = NULL; - char *parpath = NULL; - int ret = 0; - - duppath = strdupa (newpath); - parpath = dirname (duppath); - parpath = dirname (duppath); - - ret = mkdir (parpath, 0700); - if (ret == -1 && errno != EEXIST) { - gf_log (this->name, GF_LOG_ERROR, - "error mkdir hash-1 %s (%s)", - parpath, strerror (errno)); - return -1; - } - - strcpy (duppath, newpath); - parpath = dirname (duppath); - - ret = mkdir (parpath, 0700); - if (ret == -1 && errno != EEXIST) { - gf_log (this->name, GF_LOG_ERROR, - "error mkdir hash-2 %s (%s)", - parpath, strerror (errno)); - return -1; - } + int ret = -1; + char d2[3] = { + 0, + }; + + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_mkdirat(dirfd, d2, 0700); + if (ret == -1 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-2 %s ", uuid_utoa(gfid)); + return -1; + } - return 0; + return 0; } - int -posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf) +posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, + struct stat *oldbuf) { - char *newpath = NULL; - struct stat newbuf; - int ret = -1; - - - MAKE_HANDLE_ABSPATH (newpath, this, gfid); + struct stat newbuf; + struct stat hashbuf; + int ret = -1; + gf_boolean_t link_exists = _gf_false; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; + + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + + if (ret == -1 && errno != ENOENT) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", + uuid_utoa(gfid)); + return -1; + } - ret = lstat (newpath, &newbuf); - if (ret == -1 && errno != ENOENT) { - gf_log (this->name, GF_LOG_WARNING, - "%s: %s", newpath, strerror (errno)); + if (ret == -1 && errno == ENOENT) { + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); + if (ret) { + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", uuid_utoa(gfid)); return -1; + } } + ret = sys_linkat(AT_FDCWD, oldpath, dfd, newstr); - if (ret == -1 && errno == ENOENT) { - ret = posix_handle_mkdir_hashes (this, newpath); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "mkdir %s failed (%s)", - newpath, strerror (errno)); - return -1; - } - - ret = sys_link (oldpath, newpath); - - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "link %s -> %s failed (%s)", - oldpath, newpath, strerror (errno)); - return -1; - } - - ret = lstat (newpath, &newbuf); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s failed (%s)", - newpath, strerror (errno)); - return -1; - } - } - - if (newbuf.st_ino != oldbuf->st_ino || - newbuf.st_dev != oldbuf->st_dev) { - gf_log (this->name, GF_LOG_WARNING, - "mismatching ino/dev between file %s (%lld/%lld) " - "and handle %s (%lld/%lld)", - oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev, - newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev); - ret = -1; + if (ret) { + if (errno != EEXIST) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "link %s -> %s" + "failed ", + oldpath, newstr); + return -1; + } else { + link_exists = _gf_true; + } } + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - return ret; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "lstat on %s failed", uuid_utoa(gfid)); + return -1; + } + if ((link_exists) && (!S_ISREG(newbuf.st_mode))) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_HANDLE_CREATE, + "%s - Expected regular file", uuid_utoa(gfid)); + return -1; + } + } + + if (newbuf.st_ino != oldbuf->st_ino || newbuf.st_dev != oldbuf->st_dev) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_CREATE, + "mismatching ino/dev between file %s (%lld/%lld) " + "and handle %s (%lld/%lld)", + oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev, + uuid_utoa(gfid), (long long)newbuf.st_ino, + (long long)newbuf.st_dev); + ret = -1; + } + + return ret; } - int -posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc, - uuid_t gfid, struct stat *oldbuf) +posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, + uuid_t gfid, struct stat *oldbuf) { - char *oldpath = NULL; - char *newpath = NULL; - struct stat newbuf; - int ret = -1; - - MAKE_HANDLE_ABSPATH (newpath, this, gfid); - MAKE_HANDLE_RELPATH (oldpath, this, loc->pargfid, loc->name); - - ret = lstat (newpath, &newbuf); - if (ret == -1 && errno != ENOENT) { - gf_log (this->name, GF_LOG_WARNING, - "%s: %s", newpath, strerror (errno)); - return -1; - } - - if (ret == -1 && errno == ENOENT) { - ret = posix_handle_mkdir_hashes (this, newpath); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "mkdir %s failed (%s)", - newpath, strerror (errno)); - return -1; - } - - ret = symlink (oldpath, newpath); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "symlink %s -> %s failed (%s)", - oldpath, newpath, strerror (errno)); - return -1; - } + char *oldpath = NULL; + char *newpath = NULL; + struct stat newbuf; + struct stat hashbuf; + int ret = -1; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; + + MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); + MAKE_HANDLE_RELPATH(oldpath, this, loc->pargfid, loc->name); + + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + + if (ret == -1 && errno != ENOENT) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", + newstr); + return -1; + } - ret = lstat (newpath, &newbuf); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "stat on %s failed (%s)", - newpath, strerror (errno)); - return -1; - } + if (ret == -1 && errno == ENOENT) { + if (posix_is_malformed_link(this, newpath, oldpath, strlen(oldpath))) { + GF_ASSERT(!"Malformed link"); + errno = EINVAL; + return -1; } - ret = stat (real_path, &newbuf); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); + if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "stat on %s failed (%s)", newpath, strerror (errno)); + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", newstr); return -1; + } + } + ret = sys_symlinkat(oldpath, dfd, newstr); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "symlink %s -> %s failed", oldpath, newstr); + return -1; } - if (!oldbuf) - return ret; + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - if (newbuf.st_ino != oldbuf->st_ino || - newbuf.st_dev != oldbuf->st_dev) { - gf_log (this->name, GF_LOG_WARNING, - "mismatching ino/dev between file %s (%lld/%lld) " - "and handle %s (%lld/%lld)", - oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev, - newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev); - ret = -1; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "stat on %s failed ", newstr); + return -1; } + } + ret = sys_stat(real_path, &newbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "stat on %s failed ", real_path); + return -1; + } + + if (!oldbuf) return ret; -} + if (newbuf.st_ino != oldbuf->st_ino || newbuf.st_dev != oldbuf->st_dev) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_CREATE, + "mismatching ino/dev between file %s (%lld/%lld) " + "and handle %s (%lld/%lld)", + oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev, + newpath, (long long)newbuf.st_ino, (long long)newbuf.st_dev); + ret = -1; + } + + return ret; +} int -posix_handle_unset_gfid (xlator_t *this, uuid_t gfid) +posix_handle_unset_gfid(xlator_t *this, uuid_t gfid) { - char *path = NULL; - int ret = 0; - struct stat stat; - - MAKE_HANDLE_GFID_PATH (path, this, gfid, NULL); - - ret = lstat (path, &stat); - - if (ret == -1) { - if (errno != ENOENT) { - gf_log (this->name, GF_LOG_WARNING, - "%s: %s", path, strerror (errno)); - } - goto out; - } - - ret = unlink (path); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "unlink %s failed (%s)", path, strerror (errno)); - } + int ret = 0; + struct stat stat; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + struct posix_private *priv = this->private; + + index = gfid[0]; + dfd = priv->arrdfd[index]; + + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_utoa(gfid)); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); + + if (ret == -1) { + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s", + newstr); + } + goto out; + } + + ret = sys_unlinkat(dfd, newstr); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, + "unlink %s is failed", newstr); + } out: - return ret; + return ret; } - int -posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename) +posix_handle_unset(xlator_t *this, uuid_t gfid, const char *basename) { - int ret; - struct iatt stat; - char *path = NULL; - - if (!basename) { - ret = posix_handle_unset_gfid (this, gfid); - return ret; - } - - MAKE_HANDLE_PATH (path, this, gfid, basename); - if (!path) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to create handle path for %s (%s)", - basename, uuid_utoa(gfid)); - return -1; - } + int ret; + struct iatt stat; + char *path = NULL; - ret = posix_istat (this, gfid, basename, &stat); + if (!basename) { + ret = posix_handle_unset_gfid(this, gfid); + return ret; + } - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "%s: %s", path, strerror (errno)); - return -1; - } + MAKE_HANDLE_PATH(path, this, gfid, basename); + if (!path) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_DELETE, + "Failed to create handle path for %s (%s)", basename, + uuid_utoa(gfid)); + return -1; + } + + /* stat is being used only for gfid, so passing a NULL inode + * doesn't fetch time attributes which is fine + */ + ret = posix_istat(this, NULL, gfid, basename, &stat); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s", + path); + return -1; + } - ret = posix_handle_unset_gfid (this, stat.ia_gfid); + ret = posix_handle_unset_gfid(this, stat.ia_gfid); - return ret; + return ret; } - int -posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid, - char *real_path) +posix_create_link_if_gfid_exists(xlator_t *this, uuid_t gfid, char *real_path, + inode_table_t *itable) { - int ret = -1; - struct stat stbuf = {0,}; - char *newpath = NULL; - - MAKE_HANDLE_PATH (newpath, this, gfid, NULL); - if (!newpath) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to create handle path (%s)", uuid_utoa(gfid)); - return ret; - } - - ret = lstat (newpath, &stbuf); - if (!ret) { - ret = sys_link (newpath, real_path); - } - + int ret = -1; + char *newpath = NULL; + char *unlink_path = NULL; + uint64_t ctx_int = 0; + inode_t *inode = NULL; + struct stat stbuf = { + 0, + }; + struct posix_private *priv = NULL; + posix_inode_ctx_t *ctx = NULL; + + priv = this->private; + + MAKE_HANDLE_PATH(newpath, this, gfid, NULL); + if (!newpath) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLE_CREATE, + "Failed to create handle path (%s)", uuid_utoa(gfid)); return ret; + } + + ret = sys_lstat(newpath, &stbuf); + if (!ret) { + ret = sys_link(newpath, real_path); + } else { + inode = inode_find(itable, gfid); + if (!inode) + return -1; + + LOCK(&inode->lock); + { + ret = __posix_inode_ctx_get_all(inode, this, &ctx); + if (ret) + goto unlock; + + if (ctx->unlink_flag != GF_UNLINK_TRUE) { + ret = -1; + goto unlock; + } + + POSIX_GET_FILE_UNLINK_PATH(priv->base_path, gfid, unlink_path); + ret = sys_link(unlink_path, real_path); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "Failed to link " + "%s with %s", + real_path, unlink_path); + goto unlock; + } + ret = sys_rename(unlink_path, newpath); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "Failed to link " + "%s with %s", + real_path, unlink_path); + goto unlock; + } + ctx_int = GF_UNLINK_FALSE; + ret = __posix_inode_ctx_set_unlink_flag(inode, this, ctx_int); + } + unlock: + UNLOCK(&inode->lock); + + inode_unref(inode); + } + + return ret; } diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h index e0b5b67e20f..f33ed92620d 100644 --- a/xlators/storage/posix/src/posix-handle.h +++ b/xlators/storage/posix/src/posix-handle.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2011-2017 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -10,277 +10,212 @@ #ifndef _POSIX_HANDLE_H #define _POSIX_HANDLE_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif +#include "posix-inode-handle.h" -#include <limits.h> -#include <sys/types.h> -#include "xlator.h" -#include "gf-dirent.h" +#define HANDLE_ABSPATH_LEN(this) \ + (POSIX_BASE_PATH_LEN(this) + \ + SLEN("/" GF_HIDDEN_PATH "/00/00/" UUID0_STR) + 1) -/* From Open Group Base Specifications Issue 6 */ -#ifndef _XOPEN_PATH_MAX -#define _XOPEN_PATH_MAX 1024 -#endif - -#define TRASH_DIR "landfill" - -#define UUID0_STR "00000000-0000-0000-0000-000000000000" -#define SLEN(str) (sizeof(str) - 1) - -#define HANDLE_ABSPATH_LEN(this) (POSIX_BASE_PATH_LEN(this) + \ - SLEN("/" GF_HIDDEN_PATH "/00/00/" \ - UUID0_STR) + 1) - -#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/')) -#define LOC_IS_DIR(loc) (loc && (loc->inode) && \ - (loc->inode->ia_type == IA_IFDIR)) - -#define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) do { \ - var = alloca (strlen (prefix) + UUID_CANONICAL_FORM_LEN + 1); \ - strcpy (var, prefix); \ - strcat (var, uuid_utoa (pgfid)); \ - } while (0) +#define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) \ + do { \ + var = alloca(SLEN(prefix) + UUID_CANONICAL_FORM_LEN + 1); \ + strcpy(var, prefix); \ + strcat(var, uuid_utoa(pgfid)); \ + } while (0) -#define SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \ - value = hton32 (value); \ - op_ret = sys_lsetxattr (path, key, &value, sizeof (value), \ - flags); \ - if (op_ret == -1) { \ - op_errno = errno; \ - gf_log (this->name, GF_LOG_WARNING, \ - "setting xattr failed on %s: key = %s (%s)", \ - path, key, strerror (op_errno)); \ - goto label; \ - } \ - } while (0) +#define SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label) \ + do { \ + value = hton32(value); \ + op_ret = sys_lsetxattr(path, key, &value, sizeof(value), flags); \ + if (op_ret == -1) { \ + op_errno = errno; \ + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \ + "setting xattr failed on %s: key = %s ", path, key); \ + goto label; \ + } \ + } while (0) -#define SET_PGFID_XATTR_IF_ABSENT(path, key, value, flags, op_ret, this, label)\ - do { \ - op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \ - if (op_ret == -1) { \ - op_errno = errno; \ - if (op_errno == ENOATTR) { \ - value = 1; \ - SET_PGFID_XATTR (path, key, value, flags, \ - op_ret, this, label); \ - } else { \ - gf_log(this->name, GF_LOG_WARNING, "getting " \ - "xattr failed on %s: key = %s (%s)", \ - path, key, strerror (op_errno)); \ - } \ - } \ - } while (0) +#define SET_PGFID_XATTR_IF_ABSENT(path, key, value, flags, op_ret, this, \ + label) \ + do { \ + op_ret = sys_lgetxattr(path, key, &value, sizeof(value)); \ + if (op_ret == -1) { \ + op_errno = errno; \ + if (op_errno == ENOATTR) { \ + value = 1; \ + SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label); \ + } else { \ + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PGFID_OP, \ + "getting xattr " \ + "failed on %s: key = %s ", \ + path, key); \ + } \ + } \ + } while (0) -#define REMOVE_PGFID_XATTR(path, key, op_ret, this, label) do { \ - op_ret = sys_lremovexattr (path, key); \ - if (op_ret == -1) { \ - op_errno = errno; \ - gf_log (this->name, GF_LOG_WARNING, "removing xattr " \ - "failed on %s: key = %s (%s)", path, key, \ - strerror (op_errno)); \ - goto label; \ - } \ - } while (0) +#define REMOVE_PGFID_XATTR(path, key, op_ret, this, label) \ + do { \ + op_ret = sys_lremovexattr(path, key); \ + if (op_ret == -1) { \ + op_errno = errno; \ + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PGFID_OP, \ + "removing xattr failed" \ + "on %s: key = %s", \ + path, key); \ + goto label; \ + } \ + } while (0) /* should be invoked holding a lock */ -#define LINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \ - op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \ - if (op_ret == -1) { \ - op_errno = errno; \ - if (op_errno == ENOATTR || op_errno == ENODATA) { \ - value = 1; \ - } else { \ - gf_log (this->name, GF_LOG_WARNING,"getting xattr " \ - "failed on %s: key = %s (%s)", path, key, \ - strerror (op_errno)); \ - goto label; \ - } \ - } else { \ - value = ntoh32 (value); \ - value++; \ - } \ - SET_PGFID_XATTR (path, key, value, flags, op_ret, this, label); \ - } while (0) +#define LINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) \ + do { \ + op_ret = sys_lgetxattr(path, key, &value, sizeof(value)); \ + if (op_ret == -1) { \ + op_errno = errno; \ + if (op_errno == ENOATTR || op_errno == ENODATA) { \ + value = 1; \ + } else { \ + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \ + "getting xattr " \ + "failed on %s: key = %s ", \ + path, key); \ + goto label; \ + } \ + } else { \ + value = ntoh32(value); \ + value++; \ + } \ + SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label); \ + } while (0) /* should be invoked holding a lock */ -#define UNLINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, label) do { \ - op_ret = sys_lgetxattr (path, key, &value, sizeof (value)); \ - if (op_ret == -1) { \ - op_errno = errno; \ - gf_log (this->name, GF_LOG_WARNING, "getting xattr failed on " \ - "%s: key = %s (%s)", path, key, strerror (op_errno)); \ - goto label; \ - } else { \ - value = ntoh32 (value); \ - value--; \ - if (value > 0) { \ - SET_PGFID_XATTR (path, key, value, flags, op_ret, \ - this, label); \ - } else { \ - REMOVE_PGFID_XATTR (path, key, op_ret, this, label); \ - } \ - } \ +#define UNLINK_MODIFY_PGFID_XATTR(path, key, value, flags, op_ret, this, \ + label) \ + do { \ + op_ret = sys_lgetxattr(path, key, &value, sizeof(value)); \ + if (op_ret == -1) { \ + op_errno = errno; \ + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, \ + "getting xattr failed on " \ + "%s: key = %s ", \ + path, key); \ + goto label; \ + } else { \ + value = ntoh32(value); \ + value--; \ + if (value > 0) { \ + SET_PGFID_XATTR(path, key, value, flags, op_ret, this, label); \ + } else { \ + REMOVE_PGFID_XATTR(path, key, op_ret, this, label); \ + } \ + } \ } while (0) -#define MAKE_REAL_PATH(var, this, path) do { \ - size_t path_len = strlen(path); \ - size_t var_len = path_len + POSIX_BASE_PATH_LEN(this) + 1; \ - if (POSIX_PATH_MAX(this) != -1 && \ - var_len >= POSIX_PATH_MAX(this)) { \ - var = alloca (path_len + 1); \ - strcpy (var, (path[0] == '/') ? path + 1 : path); \ - } else { \ - var = alloca (var_len); \ - strcpy (var, POSIX_BASE_PATH(this)); \ - strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \ - } \ +#define MAKE_HANDLE_GFID_PATH(var, this, gfid) \ + do { \ + int __len = 0; \ + struct posix_private *__priv = this->private; \ + __len = POSIX_GFID_HANDLE_SIZE(__priv->base_path_length); \ + __len += 256; \ + var = alloca(__len); \ + __len = posix_handle_gfid_path(this, gfid, var, __len); \ } while (0) -#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \ - int __len; \ - __len = posix_handle_path (this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ - var = alloca (__len); \ - __len = posix_handle_path (this, gfid, base, var, __len); \ - if (__len <= 0) \ - var = NULL; \ - } while (0) - - -#define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) do { \ - int __len = 0; \ - __len = posix_handle_gfid_path (this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ - var = alloca (__len); \ - __len = posix_handle_gfid_path (this, gfid, base, var, __len); \ - } while (0) - - -#define MAKE_HANDLE_RELPATH(var, this, gfid, base) do { \ - int __len; \ - __len = posix_handle_relpath (this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ - var = alloca (__len); \ - __len = posix_handle_relpath (this, gfid, base, var, __len); \ - } while (0) - - -#define MAKE_HANDLE_ABSPATH(var, this, gfid) do { \ - struct posix_private * __priv = this->private; \ - int __len = HANDLE_ABSPATH_LEN(this); \ - var = alloca(__len); \ - snprintf(var, __len, "%s/" GF_HIDDEN_PATH "/%02x/%02x/%s", \ - __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \ - } while (0) - - -#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \ - if (gf_uuid_is_null (loc->gfid)) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "null gfid for path %s", (loc)->path); \ - break; \ - } \ - if (LOC_IS_DIR (loc) && LOC_HAS_ABSPATH (loc)) { \ - MAKE_REAL_PATH (rpath, this, (loc)->path); \ - op_ret = posix_pstat (this, (loc)->gfid, rpath, iatt_p); \ - break; \ - } \ - errno = 0; \ - op_ret = posix_istat (this, loc->gfid, NULL, iatt_p); \ - if (errno != ELOOP) { \ - MAKE_HANDLE_PATH (rpath, this, (loc)->gfid, NULL); \ - if (!rpath) { \ - op_ret = -1; \ - gf_log (this->name, GF_LOG_ERROR, \ - "Failed to create inode handle " \ - "for path %s", (loc)->path); \ - } \ - break; \ - } \ - /* __ret == -1 && errno == ELOOP */ \ - } while (0) - +#define MAKE_HANDLE_RELPATH(var, this, gfid, base) \ + do { \ + int __len; \ + __len = POSIX_GFID_HANDLE_RELSIZE; \ + if (base) { \ + __len += (strlen(base) + 1); \ + } \ + var = alloca(__len); \ + __len = posix_handle_relpath(this, gfid, base, var, __len); \ + } while (0) -#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) do { \ - char *__parp; \ - \ - if (gf_uuid_is_null (loc->pargfid) || !loc->name) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "null pargfid/name for path %s", loc->path); \ - break; \ - } \ - \ - if (LOC_HAS_ABSPATH (loc)) { \ - MAKE_REAL_PATH (entp, this, loc->path); \ - __parp = strdupa (entp); \ - parp = dirname (__parp); \ - op_ret = posix_pstat (this, NULL, entp, ent_p); \ - break; \ - } \ - errno = 0; \ - op_ret = posix_istat (this, loc->pargfid, loc->name, ent_p); \ - if (errno != ELOOP) { \ - MAKE_HANDLE_PATH (parp, this, loc->pargfid, NULL); \ - MAKE_HANDLE_PATH (entp, this, loc->pargfid, loc->name); \ - if (!parp || !entp) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "Failed to create entry handle " \ - "for path %s", loc->path); \ - } \ - break; \ - } \ - /* __ret == -1 && errno == ELOOP */ \ - /* expand ELOOP */ \ - } while (0) +#define MAKE_HANDLE_ABSPATH(var, this, gfid) \ + do { \ + struct posix_private *__priv = this->private; \ + int __len = HANDLE_ABSPATH_LEN(this); \ + var = alloca(__len); \ + snprintf(var, __len, "%s/" GF_HIDDEN_PATH "/%02x/%02x/%s", \ + __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \ + } while (0) +#define MAKE_HANDLE_ABSPATH_FD(var, this, gfid, dfd) \ + do { \ + struct posix_private *__priv = this->private; \ + int findex = gfid[0]; \ + int __len = POSIX_GFID_HASH2_LEN; \ + var = alloca(__len); \ + snprintf(var, __len, "%02x/%s", gfid[1], uuid_utoa(gfid)); \ + dfd = __priv->arrdfd[findex]; \ + } while (0) -#define POSIX_ANCESTRY_PATH (1 << 0) -#define POSIX_ANCESTRY_DENTRY (1 << 1) +#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) \ + do { \ + char *__parp; \ + \ + if (gf_uuid_is_null(loc->pargfid) || !loc->name) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_ENTRY_HANDLE_CREATE, \ + "null pargfid/name for path %s", loc->path); \ + break; \ + } \ + \ + if (strchr(loc->name, '/')) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_ENTRY_HANDLE_CREATE, \ + "'/' in name not allowed: (%s)", loc->name); \ + op_ret = -1; \ + break; \ + } \ + if (LOC_HAS_ABSPATH(loc)) { \ + MAKE_REAL_PATH(entp, this, loc->path); \ + __parp = strdupa(entp); \ + parp = dirname(__parp); \ + op_ret = posix_pstat(this, loc->inode, NULL, entp, ent_p, \ + _gf_false); \ + break; \ + } \ + errno = 0; \ + op_ret = posix_istat(this, loc->inode, loc->pargfid, loc->name, \ + ent_p); \ + if (errno != ELOOP) { \ + MAKE_HANDLE_PATH(parp, this, loc->pargfid, NULL); \ + MAKE_HANDLE_PATH(entp, this, loc->pargfid, loc->name); \ + if (!parp || !entp) { \ + gf_msg(this->name, GF_LOG_ERROR, errno, \ + P_MSG_ENTRY_HANDLE_CREATE, \ + "Failed to create entry handle " \ + "for path %s", \ + loc->path); \ + } \ + break; \ + } \ + /* __ret == -1 && errno == ELOOP */ \ + /* expand ELOOP */ \ + } while (0) +#define POSIX_GFID_HASH2_LEN 45 int -posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf, - size_t len); +posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t len); int -posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize, - gf_dirent_t *head, int type, uuid_t gfid, - const size_t handle_size, - const char *priv_base_path, - inode_table_t *table, inode_t **parent, - dict_t *xdata); -int -posix_handle_path_safe (xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t len); +posix_handle_hard(xlator_t *this, const char *path, uuid_t gfid, + struct stat *buf); int -posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t len); +posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, + uuid_t gfid, struct stat *buf); int -posix_handle_hard (xlator_t *this, const char *path, uuid_t gfid, - struct stat *buf); - +posix_handle_unset(xlator_t *this, uuid_t gfid, const char *basename); int -posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc, - uuid_t gfid, struct stat *buf); +posix_create_link_if_gfid_exists(xlator_t *this, uuid_t gfid, char *real_path, + inode_table_t *itable); int -posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename); - -int posix_handle_mkdir_hashes (xlator_t *this, const char *newpath); +posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata); -int posix_handle_init (xlator_t *this); - -int posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid, - char *real_path); - -int -posix_handle_trash_init (xlator_t *this); +void +posix_disk_space_check(xlator_t *this); #endif /* !_POSIX_HANDLE_H */ diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index e1bd5b127fd..67db3324083 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -7,11 +7,6 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #define __XOPEN_SOURCE 500 #include <stdint.h> @@ -23,6 +18,7 @@ #include <ftw.h> #include <sys/stat.h> #include <signal.h> +#include <aio.h> #ifdef HAVE_SYS_ACL_H #ifdef HAVE_ACL_LIBACL_H /* for acl_to_any_text() */ @@ -36,2022 +32,3635 @@ #include <alloca.h> #endif /* GF_BSD_HOST_OS */ -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" +#include <fnmatch.h> #include "posix.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" +#include "posix-messages.h" +#include "posix-metadata.h" +#include "posix-handle.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include "hashfn.h" -#include "glusterfs-acl.h" -#include <fnmatch.h> +#include <glusterfs/glusterfs-acl.h> +#include "posix-gfid-path.h" +#include <glusterfs/events.h> +#include "glusterfs/syncop.h" +#include "timer-wheel.h" +#include <sys/types.h> char *marker_xattrs[] = {"trusted.glusterfs.quota.*", - "trusted.glusterfs.*.xtime", - NULL}; - -char *marker_contri_key = "trusted.*.*.contri"; - -static char* posix_ignore_xattrs[] = { - "gfid-req", - GLUSTERFS_ENTRYLK_COUNT, - GLUSTERFS_INODELK_COUNT, - GLUSTERFS_POSIXLK_COUNT, - GLUSTERFS_PARENT_ENTRYLK, - GF_GFIDLESS_LOOKUP, - NULL -}; - -static char* list_xattr_ignore_xattrs[] = { - GF_SELINUX_XATTR_KEY, - GF_XATTR_VOL_ID_KEY, - GFID_XATTR_KEY, - NULL -}; + "trusted.glusterfs.*.xtime", NULL}; + +static char *marker_contri_key = "trusted.*.*.contri"; + +static char *posix_ignore_xattrs[] = {"gfid-req", + GLUSTERFS_INTERNAL_FOP_KEY, + GLUSTERFS_ENTRYLK_COUNT, + GLUSTERFS_INODELK_COUNT, + GLUSTERFS_POSIXLK_COUNT, + GLUSTERFS_PARENT_ENTRYLK, + GF_GFIDLESS_LOOKUP, + GLUSTERFS_INODELK_DOM_COUNT, + NULL}; + +static char *list_xattr_ignore_xattrs[] = {GFID_XATTR_KEY, GF_XATTR_VOL_ID_KEY, + GF_SELINUX_XATTR_KEY, NULL}; + gf_boolean_t -posix_special_xattr (char **pattern, char *key) +posix_special_xattr(char **pattern, char *key) { - int i = 0; - gf_boolean_t flag = _gf_false; + int i = 0; + gf_boolean_t flag = _gf_false; - GF_VALIDATE_OR_GOTO ("posix", pattern, out); - GF_VALIDATE_OR_GOTO ("posix", key, out); + GF_VALIDATE_OR_GOTO("posix", pattern, out); + GF_VALIDATE_OR_GOTO("posix", key, out); - for (i = 0; pattern[i]; i++) { - if (!fnmatch (pattern[i], key, 0)) { - flag = _gf_true; - break; - } + for (i = 0; pattern[i]; i++) { + if (!fnmatch(pattern[i], key, 0)) { + flag = _gf_true; + break; } + } out: - return flag; + return flag; } -static gf_boolean_t -_is_in_array (char **str_array, char *str) +int +posix_handle_mdata_xattr(call_frame_t *frame, const char *name, int *op_errno) { - int i = 0; + int i = 0; + int ret = 0; + int pid = 1; + static const char *const internal_xattr[] = {GF_XATTR_MDATA_KEY, NULL}; + if (frame && frame->root) { + pid = frame->root->pid; + } + + if (!name || pid < GF_CLIENT_PID_MAX) { + /* No need to do anything here */ + ret = 0; + goto out; + } - if (!str) - return _gf_false; + for (i = 0; internal_xattr[i]; i++) { + if (fnmatch(internal_xattr[i], name, FNM_PERIOD) == 0) { + ret = -1; + if (op_errno) { + *op_errno = ENOATTR; + } - for (i = 0; str_array[i]; i++) { - if (strcmp (str, str_array[i]) == 0) - return _gf_true; + gf_msg_debug("posix", ENOATTR, + "Ignoring the key %s as an internal " + "xattrs.", + name); + goto out; } - return _gf_false; -} + } -static gf_boolean_t -posix_xattr_ignorable (char *key) -{ - return _is_in_array (posix_ignore_xattrs, key); + ret = 0; +out: + return ret; } -static int -_posix_xattr_get_set_from_backend (posix_xattr_filler_t *filler, char *key) +int +posix_handle_georep_xattrs(call_frame_t *frame, const char *name, int *op_errno, + gf_boolean_t is_getxattr) { - ssize_t xattr_size = -1; - int ret = 0; - char *value = NULL; + int i = 0; + int ret = 0; + int pid = 1; + gf_boolean_t filter_xattr = _gf_true; + static const char *georep_xattr[] = { + "*.glusterfs.*.stime", "*.glusterfs.*.xtime", + "*.glusterfs.*.entry_stime", "*.glusterfs.volume-mark.*", NULL}; + + if (!name) { + /* No need to do anything here */ + ret = 0; + goto out; + } - if (filler->real_path) - xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0); - else - xattr_size = sys_fgetxattr (filler->fdnum, key, NULL, 0); + if (frame && frame->root) { + pid = frame->root->pid; + } - if (xattr_size != -1) { - value = GF_CALLOC (1, xattr_size + 1, gf_posix_mt_char); - if (!value) - goto out; + if (pid == GF_CLIENT_PID_GSYNCD && is_getxattr) { + filter_xattr = _gf_false; - if (filler->real_path) - xattr_size = sys_lgetxattr (filler->real_path, key, - value, xattr_size); - else - xattr_size = sys_fgetxattr (filler->fdnum, key, value, - xattr_size); - if (xattr_size == -1) { - if (filler->real_path) - gf_log (filler->this->name, GF_LOG_WARNING, - "getxattr failed. path: %s, key: %s", - filler->real_path, key); - else - gf_log (filler->this->name, GF_LOG_WARNING, - "getxattr failed. gfid: %s, key: %s", - uuid_utoa (filler->fd->inode->gfid), - key); - GF_FREE (value); - goto out; - } + /* getxattr from gsyncd process should return all the + * internal xattr. In other cases ignore such xattrs + */ + } - value[xattr_size] = '\0'; - ret = dict_set_bin (filler->xattr, key, value, xattr_size); - if (ret < 0) { - if (filler->real_path) - gf_log (filler->this->name, GF_LOG_DEBUG, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - else - gf_log (filler->this->name, GF_LOG_DEBUG, - "dict set failed. gfid: %s, key: %s", - uuid_utoa (filler->fd->inode->gfid), - key); - GF_FREE (value); - goto out; - } + for (i = 0; filter_xattr && georep_xattr[i]; i++) { + if (fnmatch(georep_xattr[i], name, FNM_PERIOD) == 0) { + ret = -1; + if (op_errno) + *op_errno = ENOATTR; + + gf_msg_debug("posix", ENOATTR, + "Ignoring the key %s as an internal " + "xattrs.", + name); + goto out; } + } + + ret = 0; +out: + return ret; +} + +int32_t +posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict, struct iatt *in_stbuf) +{ + int ret = -1; + mode_t mode = 0; + + if ((!in_dict) || (!in_stbuf) || (!out_dict)) { + goto out; + } + + /* We need this only for files */ + if (!(IA_ISREG(in_stbuf->ia_type))) { ret = 0; + goto out; + } + + /* Nobody asked for this */ + if (!dict_get(in_dict, DHT_MODE_IN_XDATA_KEY)) { + ret = 0; + goto out; + } + mode = st_mode_from_ia(in_stbuf->ia_prot, in_stbuf->ia_type); + + ret = dict_set_int32(out_dict, DHT_MODE_IN_XDATA_KEY, mode); + out: - return ret; + return ret; } -static int gf_posix_xattr_enotsup_log; +static gf_boolean_t +posix_xattr_ignorable(char *key) +{ + return gf_get_index_by_elem(posix_ignore_xattrs, key) >= 0; +} static int -_posix_get_marker_all_contributions (posix_xattr_filler_t *filler) +_posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) { - ssize_t size = -1, remaining_size = -1, list_offset = 0; - int ret = -1; - char *list = NULL, key[4096] = {0, }; - - if (filler->real_path) - size = sys_llistxattr (filler->real_path, NULL, 0); - else - size = sys_flistxattr (filler->fdnum, NULL, 0); - if (size == -1) { - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - THIS->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting brick" - " with 'user_xattr' flag)"); - } else { - if (filler->real_path) - gf_log (THIS->name, GF_LOG_WARNING, - "listxattr failed on %s: %s", - filler->real_path, strerror (errno)); - else - gf_log (THIS->name, GF_LOG_WARNING, - "listxattr failed on %s: %s", - uuid_utoa (filler->fd->inode->gfid), - strerror (errno)); - } - goto out; + ssize_t xattr_size = 256; /* guesstimated initial size of xattr */ + int ret = -1; + char *value = NULL; + + if (!gf_is_valid_xattr_namespace(key)) { + goto out; + } + + /* Most of the gluster internal xattrs don't exceed 256 bytes. So try + * getxattr with ~256 bytes. If it gives ERANGE then go the old way + * of getxattr with NULL buf to find the length and then getxattr with + * allocated buf to fill the data. This way we reduce lot of getxattrs. + */ + + value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char); + if (!value) { + goto out; + } + + if (filler->real_path) + xattr_size = sys_lgetxattr(filler->real_path, key, value, xattr_size); + else + xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size); + + if (xattr_size == -1) { + if (value) { + GF_FREE(value); + value = NULL; + } + /* xattr_size == -1 - failed to fetch the xattr with + * current settings. + * If it was not because value was too small, abort + */ + if (errno != ERANGE) { + goto out; + } + + /* Get the real length needed */ + if (filler->real_path) { + xattr_size = sys_lgetxattr(filler->real_path, key, NULL, 0); + } else { + xattr_size = sys_fgetxattr(filler->fdnum, key, NULL, 0); } - - if (size == 0) { - ret = 0; - goto out; + if (xattr_size == -1) { + goto out; } - list = alloca (size); - if (!list) { - goto out; + value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char); + if (!value) { + goto out; } + if (filler->real_path) { + xattr_size = sys_lgetxattr(filler->real_path, key, value, + xattr_size); + } else { + xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size); + } + if (xattr_size == -1) { + GF_FREE(value); + value = NULL; + if (filler->real_path) + gf_msg(filler->this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_FAILED, "getxattr failed. path: %s, key: %s", + filler->real_path, key); + else + gf_msg(filler->this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_FAILED, "getxattr failed. gfid: %s, key: %s", + uuid_utoa(filler->fd->inode->gfid), key); + goto out; + } + } + + value[xattr_size] = '\0'; + ret = dict_set_bin(filler->xattr, key, value, xattr_size); + + if (ret < 0) { + if (value) + GF_FREE(value); if (filler->real_path) - size = sys_llistxattr (filler->real_path, list, size); + gf_msg_debug(filler->this->name, 0, + "dict set failed. path: %s, key: %s", + filler->real_path, key); else - size = sys_flistxattr (filler->fdnum, list, size); - if (size <= 0) { - ret = size; - goto out; - } + gf_msg_debug(filler->this->name, 0, + "dict set failed. gfid: %s, key: %s", + uuid_utoa(filler->fd->inode->gfid), key); + goto out; + } + ret = 0; +out: + return ret; +} - remaining_size = size; - list_offset = 0; +static int gf_posix_xattr_enotsup_log; - while (remaining_size > 0) { - strcpy (key, list + list_offset); - if (fnmatch (marker_contri_key, key, 0) == 0) { - ret = _posix_xattr_get_set_from_backend (filler, key); - } +static int +_posix_get_marker_all_contributions(posix_xattr_filler_t *filler) +{ + ssize_t size = -1, remaining_size = -1, list_offset = 0; + int ret = -1; + int len; + char *list = NULL, key[4096] = { + 0, + }; + + if (filler->real_path) + size = sys_llistxattr(filler->real_path, NULL, 0); + else + size = sys_flistxattr(filler->fdnum, NULL, 0); + if (size == -1) { + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, THIS->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting brick" + " with 'user_xattr' flag)"); + } else { + if (filler->real_path) + gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "listxattr failed on %s", filler->real_path); + else + gf_msg(THIS->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "listxattr failed on %s", + uuid_utoa(filler->fd->inode->gfid)); + } + goto out; + } + + if (size == 0) { + ret = 0; + goto out; + } - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; + list = alloca(size); + if (!list) { + goto out; + } + + if (filler->real_path) + size = sys_llistxattr(filler->real_path, list, size); + else + size = sys_flistxattr(filler->fdnum, list, size); + if (size <= 0) { + ret = size; + goto out; + } + + remaining_size = size; + list_offset = 0; + + while (remaining_size > 0) { + len = snprintf(key, sizeof(key), "%s", list + list_offset); + if (fnmatch(marker_contri_key, key, 0) == 0) { + (void)_posix_xattr_get_set_from_backend(filler, key); } + remaining_size -= (len + 1); + list_offset += (len + 1); + } - ret = 0; + ret = 0; out: - return ret; + return ret; } static int -_posix_get_marker_quota_contributions (posix_xattr_filler_t *filler, char *key) +_posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key) { - char *saveptr = NULL, *token = NULL, *tmp_key = NULL; - char *ptr = NULL; - int i = 0, ret = 0; + char *saveptr = NULL, *token = NULL, *tmp_key = NULL; + char *ptr = NULL; + int i = 0, ret = 0; - tmp_key = ptr = gf_strdup (key); - for (i = 0; i < 4; i++) { - token = strtok_r (tmp_key, ".", &saveptr); - tmp_key = NULL; - } + tmp_key = ptr = gf_strdup(key); + if (tmp_key == NULL) { + return -1; + } + for (i = 0; i < 4; i++) { + token = strtok_r(tmp_key, ".", &saveptr); + tmp_key = NULL; + } - if (strncmp (token, "contri", strlen ("contri")) == 0) { - ret = _posix_get_marker_all_contributions (filler); - } else { - ret = _posix_xattr_get_set_from_backend (filler, key); - } + if (strncmp(token, "contri", SLEN("contri")) == 0) { + ret = _posix_get_marker_all_contributions(filler); + } else { + ret = _posix_xattr_get_set_from_backend(filler, key); + } - GF_FREE (ptr); + GF_FREE(ptr); - return ret; + return ret; } static inode_t * -_get_filler_inode (posix_xattr_filler_t *filler) +_get_filler_inode(posix_xattr_filler_t *filler) { - if (filler->fd) - return filler->fd->inode; - else if (filler->loc && filler->loc->inode) - return filler->loc->inode; - else - return NULL; + if (filler->fd) + return filler->fd->inode; + else if (filler->loc && filler->loc->inode) + return filler->loc->inode; + else + return NULL; } static int -_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key) +_posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, + void *xattrargs) { - inode_t *inode = NULL; - int ret = -1; - - inode = _get_filler_inode (filler); - if (!inode || gf_uuid_is_null (inode->gfid)) - goto out; - - ret = dict_set_uint32 (filler->xattr, key, inode->fd_count); + posix_xattr_filler_t *filler = xattrargs; + int ret = -1; + int len = 0; + char *databuf = NULL; + int _fd = -1; + ssize_t req_size = 0; + int32_t list_offset = 0; + ssize_t remaining_size = 0; + char *xattr = NULL; + inode_t *inode = NULL; + char *value = NULL; + struct iatt stbuf = { + 0, + }; + + if (posix_xattr_ignorable(key)) + goto out; + + len = strlen(key); + /* should size be put into the data_t ? */ + if ((filler->stbuf != NULL && IA_ISREG(filler->stbuf->ia_type)) && + (len == SLEN(GF_CONTENT_KEY) && !strcmp(key, GF_CONTENT_KEY))) { + if (!filler->real_path) + goto out; + + /* file content request */ + req_size = data_to_uint64(data); + if (req_size >= filler->stbuf->ia_size) { + _fd = open(filler->real_path, O_RDONLY); + if (_fd == -1) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "Opening file %s failed", + filler->real_path); + goto err; + } + + /* + * There could be a situation where the ia_size is + * zero. GF_CALLOC will return a pointer to the + * memory initialized by gf_mem_set_acct_info. + * This function adds a header and a footer to + * the allocated memory. The returned pointer + * points to the memory just after the header, but + * when size is zero, there is no space for user + * data. The memory can be freed by calling GF_FREE. + */ + databuf = GF_CALLOC(1, filler->stbuf->ia_size, gf_posix_mt_char); + if (!databuf) { + goto err; + } + + ret = sys_read(_fd, databuf, filler->stbuf->ia_size); + if (ret == -1) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "Read on file %s failed", + filler->real_path); + goto err; + } + + ret = sys_close(_fd); + _fd = -1; + if (ret == -1) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "Close on file %s failed", + filler->real_path); + goto err; + } + + ret = dict_set_bin(filler->xattr, key, databuf, + filler->stbuf->ia_size); + if (ret < 0) { + gf_msg(filler->this->name, GF_LOG_ERROR, 0, + P_MSG_XDATA_GETXATTR, + "failed to set dict value. key: %s," + "path: %s", + key, filler->real_path); + goto err; + } + + /* To avoid double free in cleanup below */ + databuf = NULL; + err: + if (_fd != -1) + sys_close(_fd); + GF_FREE(databuf); + } + } else if (len == SLEN(GLUSTERFS_OPEN_FD_COUNT) && + !strcmp(key, GLUSTERFS_OPEN_FD_COUNT)) { + inode = _get_filler_inode(filler); + if (!inode || gf_uuid_is_null(inode->gfid)) + goto out; + ret = dict_set_uint32(filler->xattr, key, inode->fd_count); if (ret < 0) { - gf_log (filler->this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", key); - goto out; + gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set dictionary value for %s", key); + } + } else if (len == SLEN(GLUSTERFS_ACTIVE_FD_COUNT) && + !strcmp(key, GLUSTERFS_ACTIVE_FD_COUNT)) { + inode = _get_filler_inode(filler); + if (!inode || gf_uuid_is_null(inode->gfid)) + goto out; + ret = dict_set_uint32(filler->xattr, key, inode->active_fd_count); + if (ret < 0) { + gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set dictionary value for %s", key); + } + } else if (len == SLEN(GET_ANCESTRY_PATH_KEY) && + !strcmp(key, GET_ANCESTRY_PATH_KEY)) { + /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt + * fetching it via path-based fops. Hence, leaving it as it is + * for now. + */ + if (!filler->real_path) + goto out; + char *path = NULL; + ret = posix_get_ancestry(filler->this, filler->loc->inode, NULL, &path, + POSIX_ANCESTRY_PATH, &filler->op_errno, + xattr_req); + if (ret < 0) { + goto out; } -out: - return ret; -} -static int -_posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data, - void *xattrargs) -{ - posix_xattr_filler_t *filler = xattrargs; - int ret = -1; - char *databuf = NULL; - int _fd = -1; - loc_t *loc = NULL; - ssize_t req_size = 0; + ret = dict_set_dynstr_sizen(filler->xattr, GET_ANCESTRY_PATH_KEY, path); + if (ret < 0) { + GF_FREE(path); + goto out; + } + } else if (fnmatch(marker_contri_key, key, 0) == 0) { + ret = _posix_get_marker_quota_contributions(filler, key); + } else if (len == SLEN(GF_REQUEST_LINK_COUNT_XDATA) && + strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) { + ret = dict_set_sizen(filler->xattr, GF_REQUEST_LINK_COUNT_XDATA, data); + } else if (len == SLEN(GF_GET_SIZE) && strcmp(key, GF_GET_SIZE) == 0) { + if (filler->stbuf && IA_ISREG(filler->stbuf->ia_type)) { + ret = dict_set_uint64(filler->xattr, GF_GET_SIZE, + filler->stbuf->ia_size); + } + } else if (GF_POSIX_ACL_REQUEST(key)) { + if (filler->real_path) + ret = posix_pstat(filler->this, NULL, NULL, filler->real_path, + &stbuf, _gf_false); + else + ret = posix_fdstat(filler->this, filler->fd->inode, filler->fdnum, + &stbuf); + if (ret < 0) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "lstat on %s failed", + filler->real_path ?: uuid_utoa(filler->fd->inode->gfid)); + goto out; + } - if (posix_xattr_ignorable (key)) - goto out; - /* should size be put into the data_t ? */ - if (!strcmp (key, GF_CONTENT_KEY) - && IA_ISREG (filler->stbuf->ia_type)) { - if (!filler->real_path) - goto out; - - /* file content request */ - req_size = data_to_uint64 (data); - if (req_size >= filler->stbuf->ia_size) { - _fd = open (filler->real_path, O_RDONLY); - if (_fd == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Opening file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - /* - * There could be a situation where the ia_size is - * zero. GF_CALLOC will return a pointer to the - * memory initialized by gf_mem_set_acct_info. - * This function adds a header and a footer to - * the allocated memory. The returned pointer - * points to the memory just after the header, but - * when size is zero, there is no space for user - * data. The memory can be freed by calling GF_FREE. - */ - databuf = GF_CALLOC (1, filler->stbuf->ia_size, - gf_posix_mt_char); - if (!databuf) { - goto err; - } - - ret = read (_fd, databuf, filler->stbuf->ia_size); - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Read on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = close (_fd); - _fd = -1; - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Close on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = dict_set_bin (filler->xattr, key, - databuf, filler->stbuf->ia_size); - if (ret < 0) { - gf_log (filler->this->name, GF_LOG_ERROR, - "failed to set dict value. key: %s, path: %s", - key, filler->real_path); - goto err; - } - - /* To avoid double free in cleanup below */ - databuf = NULL; - err: - if (_fd != -1) - close (_fd); - GF_FREE (databuf); - } - } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { - ret = _posix_filler_get_openfd_count (filler, key); - } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) { - /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt - * fetching it via path-based fops. Hence, leaving it as it is - * for now. - */ - if (!filler->real_path) - goto out; - char *path = NULL; - ret = posix_get_ancestry (filler->this, filler->loc->inode, - NULL, &path, POSIX_ANCESTRY_PATH, - &filler->op_errno, xattr_req); - if (ret < 0) { - goto out; - } + /* Avoid link follow in virt_pacl_get, donot fill acl for symlink.*/ + if (IA_ISLNK(stbuf.ia_type)) + goto out; - ret = dict_set_dynstr (filler->xattr, GET_ANCESTRY_PATH_KEY, - path); - if (ret < 0) { - GF_FREE (path); - goto out; - } + /* ACL_TYPE_DEFAULT is not supported for non-directory, skip */ + if (!IA_ISDIR(stbuf.ia_type) && + !strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT))) + goto out; - } else if (fnmatch (marker_contri_key, key, 0) == 0) { - ret = _posix_get_marker_quota_contributions (filler, key); - } else if (strcmp(key, CTR_REQUEST_LINK_COUNT_XDATA) == 0) { - ret = dict_set (filler->xattr, - CTR_REQUEST_LINK_COUNT_XDATA, data); - } else { - ret = _posix_xattr_get_set_from_backend (filler, key); + ret = posix_pacl_get(filler->real_path, filler->fdnum, key, &value); + if (ret || !value) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "could not get acl (%s) for %s, %d", + key, filler->real_path ?: uuid_utoa(filler->fd->inode->gfid), + ret); + goto out; } + + ret = dict_set_dynstrn(filler->xattr, (char *)key, len, value); + if (ret < 0) { + GF_FREE(value); + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, + "could not set acl (%s) for %s in dictionary", key, + filler->real_path ?: uuid_utoa(filler->fd->inode->gfid)); + goto out; + } + } else { + remaining_size = filler->list_size; + while (remaining_size > 0) { + xattr = filler->list + list_offset; + if (fnmatch(key, xattr, 0) == 0) + ret = _posix_xattr_get_set_from_backend(filler, xattr); + len = strlen(xattr); + remaining_size -= (len + 1); + list_offset += (len + 1); + } + } out: - return 0; + return 0; } - int -posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt) +posix_fill_gfid_path(xlator_t *this, const char *path, struct iatt *iatt) { - int ret = 0; - ssize_t size = 0; + int ret = 0; + ssize_t size = 0; - if (!iatt) - return 0; + if (!iatt) + return 0; - size = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16); - /* Return value of getxattr */ - if ((size == 16) || (size == -1)) - ret = 0; - else - ret = size; + size = sys_lgetxattr(path, GFID_XATTR_KEY, iatt->ia_gfid, 16); + /* Return value of getxattr */ + if ((size == 16) || (size == -1)) + ret = 0; + else + ret = size; - return ret; + return ret; } - int -posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt) +posix_fill_gfid_fd(xlator_t *this, int fd, struct iatt *iatt) { - int ret = 0; - ssize_t size = 0; + int ret = 0; + ssize_t size = 0; - if (!iatt) - return 0; + if (!iatt) + return 0; - size = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16); - /* Return value of getxattr */ - if ((size == 16) || (size == -1)) - ret = 0; - else - ret = size; + size = sys_fgetxattr(fd, GFID_XATTR_KEY, iatt->ia_gfid, 16); + /* Return value of getxattr */ + if ((size == 16) || (size == -1)) + ret = 0; + else + ret = size; - return ret; + return ret; } void -posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf) +posix_fill_ino_from_gfid(xlator_t *this, struct iatt *buf) { - uint64_t temp_ino = 0; - int j = 0; - int i = 0; - - /* consider least significant 8 bytes of value out of gfid */ - if (gf_uuid_is_null (buf->ia_gfid)) { - buf->ia_ino = -1; - goto out; - } - for (i = 15; i > (15 - 8); i--) { - temp_ino += (uint64_t)(buf->ia_gfid[i]) << j; - j += 8; - } - buf->ia_ino = temp_ino; + /* consider least significant 8 bytes of value out of gfid */ + if (gf_uuid_is_null(buf->ia_gfid)) { + buf->ia_ino = -1; + goto out; + } + buf->ia_ino = gfid_to_ino(buf->ia_gfid); + buf->ia_flags |= IATT_INO; out: - return; + return; } int -posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p) +posix_fdstat(xlator_t *this, inode_t *inode, int fd, struct iatt *stbuf_p) { - int ret = 0; - struct stat fstatbuf = {0, }; - struct iatt stbuf = {0, }; + int ret = 0; + struct stat fstatbuf = { + 0, + }; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; - ret = fstat (fd, &fstatbuf); - if (ret == -1) - goto out; + priv = this->private; + + ret = sys_fstat(fd, &fstatbuf); + if (ret == -1) + goto out; - if (fstatbuf.st_nlink && !S_ISDIR (fstatbuf.st_mode)) - fstatbuf.st_nlink--; + if (fstatbuf.st_nlink && !S_ISDIR(fstatbuf.st_mode)) + fstatbuf.st_nlink--; - iatt_from_stat (&stbuf, &fstatbuf); + iatt_from_stat(&stbuf, &fstatbuf); - ret = posix_fill_gfid_fd (this, fd, &stbuf); - if (ret) - gf_log_callingfn (this->name, GF_LOG_DEBUG, "failed to get gfid"); + if (inode && priv->ctime) { + ret = posix_get_mdata_xattr(this, NULL, fd, inode, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on gfid: %s", + uuid_utoa(inode->gfid)); + goto out; + } + } + ret = posix_fill_gfid_fd(this, fd, &stbuf); + stbuf.ia_flags |= IATT_GFID; - posix_fill_ino_from_gfid (this, &stbuf); + posix_fill_ino_from_gfid(this, &stbuf); - if (stbuf_p) - *stbuf_p = stbuf; + if (stbuf_p) + *stbuf_p = stbuf; out: - return ret; + return ret; } - +/* The inode here is expected to update posix_mdata stored on disk. + * Don't use it as a general purpose inode and don't expect it to + * be always exists + */ int -posix_istat (xlator_t *this, uuid_t gfid, const char *basename, - struct iatt *buf_p) -{ - char *real_path = NULL; - struct stat lstatbuf = {0, }; - struct iatt stbuf = {0, }; - int ret = 0; - struct posix_private *priv = NULL; - - priv = this->private; - - MAKE_HANDLE_PATH (real_path, this, gfid, basename); - if (!real_path) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to create handle path for %s/%s", - uuid_utoa (gfid), basename ? basename : ""); - errno = ESTALE; - ret = -1; - goto out; - } +posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename, + struct iatt *buf_p) +{ + char *real_path = NULL; + struct stat lstatbuf = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + MAKE_HANDLE_PATH(real_path, this, gfid, basename); + if (!real_path) { + gf_msg(this->name, GF_LOG_ERROR, ESTALE, P_MSG_HANDLE_PATH_CREATE, + "Failed to create handle path for %s/%s", uuid_utoa(gfid), + basename ? basename : ""); + errno = ESTALE; + ret = -1; + goto out; + } + + ret = sys_lstat(real_path, &lstatbuf); + + if (ret != 0) { + if (ret == -1) { + if (errno != ENOENT && errno != ELOOP) + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", real_path); + } else { + // may be some backend filesystem issue + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_LSTAT_FAILED, + "lstat failed on %s and return value is %d " + "instead of -1. Please see dmesg output to " + "check whether the failure is due to backend " + "filesystem issue", + real_path, ret); + ret = -1; + } + goto out; + } + + if ((lstatbuf.st_ino == priv->handledir.st_ino) && + (lstatbuf.st_dev == priv->handledir.st_dev)) { + errno = ENOENT; + return -1; + } - ret = lstat (real_path, &lstatbuf); + if (!S_ISDIR(lstatbuf.st_mode)) + lstatbuf.st_nlink--; - if (ret != 0) { - if (ret == -1) { - if (errno != ENOENT && errno != ELOOP) - gf_log (this->name, GF_LOG_WARNING, - "lstat failed on %s (%s)", - real_path, strerror (errno)); - } else { - // may be some backend filesystem issue - gf_log (this->name, GF_LOG_ERROR, "lstat failed on " - "%s and return value is %d instead of -1. " - "Please see dmesg output to check whether the " - "failure is due to backend filesystem issue", - real_path, ret); - ret = -1; - } - goto out; - } + iatt_from_stat(&stbuf, &lstatbuf); - if ((lstatbuf.st_ino == priv->handledir.st_ino) && - (lstatbuf.st_dev == priv->handledir.st_dev)) { - errno = ENOENT; - return -1; + if (inode && priv->ctime) { + ret = posix_get_mdata_xattr(this, real_path, -1, inode, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on %s", real_path); + goto out; } + } - if (!S_ISDIR (lstatbuf.st_mode)) - lstatbuf.st_nlink --; - - iatt_from_stat (&stbuf, &lstatbuf); - - if (basename) - posix_fill_gfid_path (this, real_path, &stbuf); - else - gf_uuid_copy (stbuf.ia_gfid, gfid); + if (basename) + posix_fill_gfid_path(this, real_path, &stbuf); + else + gf_uuid_copy(stbuf.ia_gfid, gfid); + stbuf.ia_flags |= IATT_GFID; - posix_fill_ino_from_gfid (this, &stbuf); + posix_fill_ino_from_gfid(this, &stbuf); - if (buf_p) - *buf_p = stbuf; + if (buf_p) + *buf_p = stbuf; out: - return ret; + return ret; } - - int -posix_pstat (xlator_t *this, uuid_t gfid, const char *path, - struct iatt *buf_p) +posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, + struct iatt *buf_p, gf_boolean_t inode_locked) { - struct stat lstatbuf = {0, }; - struct iatt stbuf = {0, }; - int ret = 0; - struct posix_private *priv = NULL; + struct stat lstatbuf = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = 0; + int op_errno = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (gfid && !gf_uuid_is_null(gfid)) + gf_uuid_copy(stbuf.ia_gfid, gfid); + else + posix_fill_gfid_path(this, path, &stbuf); + stbuf.ia_flags |= IATT_GFID; + + ret = sys_lstat(path, &lstatbuf); + if (ret == -1) { + if (errno != ENOENT) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", path); + errno = op_errno; /*gf_msg could have changed errno*/ + } else { + op_errno = errno; + gf_msg_debug(this->name, 0, "lstat failed on %s (%s)", path, + strerror(errno)); + errno = op_errno; /*gf_msg could have changed errno*/ + } + goto out; + } + if ((lstatbuf.st_ino == priv->handledir.st_ino) && + (lstatbuf.st_dev == priv->handledir.st_dev)) { + errno = ENOENT; + return -1; + } + + if (!S_ISDIR(lstatbuf.st_mode)) + lstatbuf.st_nlink--; + + iatt_from_stat(&stbuf, &lstatbuf); + + if (priv->ctime) { + if (inode) { + if (!inode_locked) { + ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + } else { + ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + } + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on gfid: %s", + uuid_utoa(inode->gfid)); + goto out; + } + } else { + ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on path: %s", path); + goto out; + } + } + } - priv = this->private; + posix_fill_ino_from_gfid(this, &stbuf); - ret = sys_lstat (path, &lstatbuf); + if (buf_p) + *buf_p = stbuf; +out: + return ret; +} - if (ret != 0) { - if (ret == -1) { - if (errno != ENOENT) - gf_log (this->name, GF_LOG_WARNING, - "lstat failed on %s (%s)", - path, strerror (errno)); - } else { - // may be some backend filesytem issue - gf_log (this->name, GF_LOG_ERROR, "lstat failed on " - "%s and return value is %d instead of -1. " - "Please see dmesg output to check whether the " - "failure is due to backend filesystem issue", - path, ret); - ret = -1; - } - goto out; - } +static void +_get_list_xattr(posix_xattr_filler_t *filler) +{ + ssize_t size = 0; - if ((lstatbuf.st_ino == priv->handledir.st_ino) && - (lstatbuf.st_dev == priv->handledir.st_dev)) { - errno = ENOENT; - return -1; - } + if ((!filler) || ((!filler->real_path) && (filler->fdnum < 0))) + goto out; - if (!S_ISDIR (lstatbuf.st_mode)) - lstatbuf.st_nlink --; + if (filler->real_path) + size = sys_llistxattr(filler->real_path, NULL, 0); + else + size = sys_flistxattr(filler->fdnum, NULL, 0); - iatt_from_stat (&stbuf, &lstatbuf); + if (size <= 0) + goto out; - if (gfid && !gf_uuid_is_null (gfid)) - gf_uuid_copy (stbuf.ia_gfid, gfid); - else - posix_fill_gfid_path (this, path, &stbuf); + filler->list = GF_CALLOC(1, size, gf_posix_mt_char); + if (!filler->list) + goto out; - posix_fill_ino_from_gfid (this, &stbuf); + if (filler->real_path) + size = sys_llistxattr(filler->real_path, filler->list, size); + else + size = sys_flistxattr(filler->fdnum, filler->list, size); - if (buf_p) - *buf_p = stbuf; + filler->list_size = size; out: - return ret; + return; } static void -_handle_list_xattr (dict_t *xattr_req, const char *real_path, int fdnum, - posix_xattr_filler_t *filler) +_handle_list_xattr(posix_xattr_filler_t *filler) { - int ret = -1; - ssize_t size = 0; - char *list = NULL; - int32_t list_offset = 0; - ssize_t remaining_size = 0; - char *key = NULL; - - if ((!real_path) && (fdnum < 0)) - goto out; - - if (real_path) - size = sys_llistxattr (real_path, NULL, 0); - else - size = sys_flistxattr (fdnum, NULL, 0); + int32_t list_offset = 0; + ssize_t remaining_size = 0; + char *key = NULL; + int len; - if (size <= 0) - goto out; + remaining_size = filler->list_size; + while (remaining_size > 0) { + key = filler->list + list_offset; + len = strlen(key); - list = alloca (size); - if (!list) - goto out; + if (gf_get_index_by_elem(list_xattr_ignore_xattrs, key) >= 0) + goto next; - if (real_path) - remaining_size = sys_llistxattr (real_path, list, size); - else - remaining_size = sys_flistxattr (fdnum, list, size); + if (posix_special_xattr(marker_xattrs, key)) + goto next; - if (remaining_size <= 0) - goto out; + if (posix_handle_georep_xattrs(NULL, key, NULL, _gf_false)) + goto next; - list_offset = 0; - while (remaining_size > 0) { - key = list + list_offset; + if (posix_is_gfid2path_xattr(key)) + goto next; - if (_is_in_array (list_xattr_ignore_xattrs, key)) - goto next; + if (dict_getn(filler->xattr, key, len)) + goto next; - if (posix_special_xattr (marker_xattrs, key)) - goto next; + (void)_posix_xattr_get_set_from_backend(filler, key); + next: + remaining_size -= (len + 1); + list_offset += (len + 1); - if (dict_get (filler->xattr, key)) - goto next; - - ret = _posix_xattr_get_set_from_backend (filler, key); -next: - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; - - } /* while (remaining_size > 0) */ -out: - return; + } /* while (remaining_size > 0) */ + return; } dict_t * -posix_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd, - int fdnum, dict_t *xattr_req, struct iatt *buf) +posix_xattr_fill(xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd, + int fdnum, dict_t *xattr_req, struct iatt *buf) { - dict_t *xattr = NULL; - posix_xattr_filler_t filler = {0, }; - gf_boolean_t list = _gf_false; - - if (dict_get (xattr_req, "list-xattr")) { - dict_del (xattr_req, "list-xattr"); - list = _gf_true; - } - - xattr = dict_new (); - if (!xattr) { - goto out; - } - - filler.this = this; - filler.real_path = real_path; - filler.xattr = xattr; - filler.stbuf = buf; - filler.loc = loc; - filler.fd = fd; - filler.fdnum = fdnum; - - dict_foreach (xattr_req, _posix_xattr_get_set, &filler); - if (list) - _handle_list_xattr (xattr_req, real_path, fdnum, &filler); - + dict_t *xattr = NULL; + posix_xattr_filler_t filler = { + 0, + }; + gf_boolean_t list = _gf_false; + + if (dict_get_sizen(xattr_req, "list-xattr")) { + dict_del_sizen(xattr_req, "list-xattr"); + list = _gf_true; + } + + xattr = dict_new(); + if (!xattr) { + goto out; + } + + filler.this = this; + filler.real_path = real_path; + filler.xattr = xattr; + filler.stbuf = buf; + filler.loc = loc; + filler.fd = fd; + filler.fdnum = fdnum; + + _get_list_xattr(&filler); + dict_foreach(xattr_req, _posix_xattr_get_set, &filler); + if (list) + _handle_list_xattr(&filler); + + GF_FREE(filler.list); out: - return xattr; + return xattr; } void -posix_gfid_unset (xlator_t *this, dict_t *xdata) +posix_gfid_unset(xlator_t *this, dict_t *xdata) { - uuid_t uuid = {0, }; - int ret = 0; + uuid_t uuid = { + 0, + }; + int ret = 0; - if (xdata == NULL) - goto out; + if (xdata == NULL) + goto out; - ret = dict_get_ptr (xdata, "gfid-req", (void **)&uuid); - if (ret) { - goto out; - } + ret = dict_get_gfuuid(xdata, "gfid-req", &uuid); + if (ret) { + goto out; + } - posix_handle_unset (this, uuid, NULL); + posix_handle_unset(this, uuid, NULL); out: - return; + return; } int -posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) +posix_gfid_set(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req, + pid_t pid, int *op_errno) { - void *uuid_req = NULL; - uuid_t uuid_curr; - int ret = 0; - ssize_t size = 0; - struct stat stat = {0, }; - - - if (!xattr_req) - goto out; - - if (sys_lstat (path, &stat) != 0) - goto out; - - size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16); - if (size == 16) { - ret = 0; - goto verify_handle; - } - - ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "failed to get the gfid from dict for %s", - loc->path); - goto out; - } - - ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "setting GFID on %s failed (%s)", path, - strerror (errno)); - goto out; - } - gf_uuid_copy (uuid_curr, uuid_req); + uuid_t uuid_req; + uuid_t uuid_curr; + int ret = 0; + ssize_t size = 0; + struct stat stat = { + 0, + }; + + *op_errno = 0; + + if (!xattr_req) { + if (pid != GF_SERVER_PID_TRASH) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "xattr_req is null"); + *op_errno = EINVAL; + ret = -1; + } + goto out; + } + + if (sys_lstat(path, &stat) != 0) { + ret = -1; + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", path); + goto out; + } + + size = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); + if (size == 16) { + ret = 0; + goto verify_handle; + } + + ret = dict_get_gfuuid(xattr_req, "gfid-req", &uuid_req); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get the gfid from dict for %s", + loc->path); + *op_errno = -ret; + ret = -1; + goto out; + } + if (gf_uuid_is_null(uuid_req)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, + "gfid is null for %s", loc ? loc->path : ""); + ret = -1; + *op_errno = EINVAL; + goto out; + } + + ret = sys_lsetxattr(path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GFID_FAILED, + "setting GFID on %s failed ", path); + goto out; + } + gf_uuid_copy(uuid_curr, uuid_req); verify_handle: - if (!S_ISDIR (stat.st_mode)) - ret = posix_handle_hard (this, path, uuid_curr, &stat); - else - ret = posix_handle_soft (this, path, loc, uuid_curr, &stat); + if (!S_ISDIR(stat.st_mode)) + ret = posix_handle_hard(this, path, uuid_curr, &stat); + else + ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); out: - return ret; + if (ret && !(*op_errno)) + *op_errno = errno; + return ret; } - -int -posix_set_file_contents (xlator_t *this, const char *path, char *keyp, - data_t *value, int flags) +#ifdef HAVE_SYS_ACL_H +static int +posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s) { - char * key = NULL; - char real_path[PATH_MAX]; - int32_t file_fd = -1; - int op_ret = 0; - int ret = -1; - + int ret = -1; + acl_t acl = NULL; + acl_type_t type = 0; - /* XXX: does not handle assigning GFID to created files */ + if ((!path) && (fdnum < 0)) { + errno = -EINVAL; return -1; + } - key = &(keyp[15]); - sprintf (real_path, "%s/%s", path, key); - - if (flags & XATTR_REPLACE) { - /* if file exists, replace it - * else, error out */ - file_fd = open (real_path, O_TRUNC|O_WRONLY); - - if (file_fd == -1) { - goto create; - } - - if (value->len) { - ret = write (file_fd, value->data, value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed while doing setxattr " - "for key %s on path %s: %s", - key, real_path, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s: %s", - real_path, strerror (errno)); - goto out; - } - } - - create: /* we know file doesn't exist, create it */ - - file_fd = open (real_path, O_CREAT|O_WRONLY, 0644); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "failed to open file %s with O_CREAT: %s", - key, strerror (errno)); - goto out; - } - - ret = write (file_fd, value->data, value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed on %s while setxattr with " - "key %s: %s", - real_path, key, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s while setxattr with " - "key %s: %s", - real_path, key, strerror (errno)); - goto out; - } - } - -out: - return op_ret; -} - - -int -posix_get_file_contents (xlator_t *this, uuid_t pargfid, - const char *name, char **contents) -{ - char *real_path = NULL; - int32_t file_fd = -1; - struct iatt stbuf = {0,}; - int op_ret = 0; - int ret = -1; - - - MAKE_HANDLE_PATH (real_path, this, pargfid, name); - if (!real_path) { - op_ret = -ESTALE; - gf_log (this->name, GF_LOG_ERROR, - "Failed to create handle path for %s/%s", - uuid_utoa (pargfid), name); - goto out; - } - - op_ret = posix_istat (this, pargfid, name, &stbuf); - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", - real_path, strerror (errno)); - goto out; - } - - file_fd = open (real_path, O_RDONLY); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s", - real_path, strerror (errno)); - goto out; - } - - *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char), - gf_posix_mt_char); - if (! *contents) { - op_ret = -errno; - goto out; - } + type = gf_posix_acl_get_type(key); + if (!type) + return -1; - ret = read (file_fd, *contents, stbuf.ia_size); - if (ret <= 0) { - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s", - real_path, strerror (errno)); - goto out; - } + acl = acl_from_text(acl_s); + if (!acl) + return -1; - *contents[stbuf.ia_size] = '\0'; + if (path) + ret = acl_set_file(path, type, acl); + else if (type == ACL_TYPE_ACCESS) + ret = acl_set_fd(fdnum, acl); + else { + errno = -EINVAL; + return -1; + } - op_ret = close (file_fd); - file_fd = -1; - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s", - real_path, strerror (errno)); - goto out; - } + if (ret) + /* posix_handle_pair expects ret to be the errno */ + ret = -errno; -out: - if (op_ret < 0) { - GF_FREE (*contents); - if (file_fd != -1) - close (file_fd); - } + acl_free(acl); - return op_ret; + return ret; } -#ifdef HAVE_SYS_ACL_H int -posix_pacl_set (const char *path, const char *key, const char *acl_s) +posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s) { - int ret = -1; - acl_t acl = NULL; - acl_type_t type = 0; - - type = gf_posix_acl_get_type (key); + int ret = -1; + acl_t acl = NULL; + acl_type_t type = 0; + char *acl_tmp = NULL; - acl = acl_from_text (acl_s); - ret = acl_set_file (path, type, acl); - acl_free (acl); - - return ret; -} + if ((!path) && (fdnum < 0)) { + errno = -EINVAL; + return -1; + } -int -posix_pacl_get (const char *path, const char *key, char **acl_s) -{ - int ret = -1; - acl_t acl = NULL; - acl_type_t type = 0; - char *acl_tmp = NULL; + type = gf_posix_acl_get_type(key); + if (!type) + return -1; - type = gf_posix_acl_get_type (key); - if (!type) - return -1; + if (path) + acl = acl_get_file(path, type); + else if (type == ACL_TYPE_ACCESS) + acl = acl_get_fd(fdnum); + else { + errno = -EINVAL; + return -1; + } - acl = acl_get_file (path, type); - if (!acl) - return -1; + if (!acl) + return -1; #ifdef HAVE_ACL_LIBACL_H - acl_tmp = acl_to_any_text (acl, NULL, ',', - TEXT_ABBREVIATE | TEXT_NUMERIC_IDS); + acl_tmp = acl_to_any_text(acl, NULL, ',', + TEXT_ABBREVIATE | TEXT_NUMERIC_IDS); #else /* FreeBSD and the like */ - acl_tmp = acl_to_text_np (acl, NULL, ACL_TEXT_NUMERIC_IDS); + acl_tmp = acl_to_text_np(acl, NULL, ACL_TEXT_NUMERIC_IDS); #endif - if (!acl_tmp) - goto free_acl; + if (!acl_tmp) + goto free_acl; - *acl_s = gf_strdup (acl_tmp); - if (*acl_s) - ret = 0; + *acl_s = gf_strdup(acl_tmp); + if (*acl_s) + ret = 0; - acl_free (acl_tmp); + acl_free(acl_tmp); free_acl: - acl_free (acl); + acl_free(acl); - return ret; + return ret; } #else /* !HAVE_SYS_ACL_H (NetBSD) */ int -posix_pacl_set (const char *path, const char *key, const char *acl_s) +posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s) { - errno = ENOTSUP; - return -1; + errno = ENOTSUP; + return -1; } int -posix_pacl_get (const char *path, const char *key, char **acl_s) +posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s) { - errno = ENOTSUP; - return -1; + errno = ENOTSUP; + return -1; } #endif - #ifdef GF_DARWIN_HOST_OS -static -void posix_dump_buffer (xlator_t *this, const char *real_path, const char *key, - data_t *value, int flags) -{ - char buffer[3*value->len+1]; - int index = 0; - buffer[0] = 0; - gf_loglevel_t log_level = gf_log_get_loglevel (); - if (log_level == GF_LOG_TRACE) { - char *data = (char *) value->data; - for (index = 0; index < value->len; index++) - sprintf(buffer+3*index, " %02x", data[index]); - } - gf_log (this->name, GF_LOG_DEBUG, - "Dump %s: key:%s flags: %u length:%u data:%s ", - real_path, key, flags, value->len, - (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>")); +static void +posix_dump_buffer(xlator_t *this, const char *real_path, const char *key, + data_t *value, int flags) +{ + char buffer[3 * value->len + 1]; + int index = 0; + buffer[0] = 0; + gf_loglevel_t log_level = gf_log_get_loglevel(); + if (log_level == GF_LOG_TRACE) { + char *data = (char *)value->data; + for (index = 0; index < value->len; index++) + sprintf(buffer + 3 * index, " %02x", data[index]); + } + gf_msg_debug(this->name, 0, "Dump %s: key:%s flags: %u length:%u data:%s ", + real_path, key, flags, value->len, + (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>")); } #endif int -posix_handle_pair (xlator_t *this, const char *real_path, - char *key, data_t *value, int flags) +posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf) { - int sys_ret = -1; - int ret = 0; - - if (XATTR_IS_PATHINFO (key)) { - ret = -EACCES; - goto out; - } else if (ZR_FILE_CONTENT_REQUEST(key)) { - ret = posix_set_file_contents (this, real_path, key, value, - flags); - } else if (GF_POSIX_ACL_REQUEST (key)) { - ret = posix_pacl_set (real_path, key, value->data); - } else { - sys_ret = sys_lsetxattr (real_path, key, value->data, - value->len, flags); + int sys_ret = -1; + int ret = 0; + int op_errno = 0; + struct mdata_iatt mdata_iatt = { + 0, + }; #ifdef GF_DARWIN_HOST_OS - posix_dump_buffer(this, real_path, key, value, flags); + const int error_code = EINVAL; +#else + const int error_code = EEXIST; #endif - if (sys_ret < 0) { - ret = -errno; - if (errno == ENOENT) { - if (!posix_special_xattr (marker_xattrs, - key)) { - gf_log (this->name, GF_LOG_ERROR, - "setxattr on %s failed: %s", - real_path, strerror (errno)); - } - } else { + if (XATTR_IS_PATHINFO(key)) { + ret = -EACCES; + goto out; + } else if (posix_is_gfid2path_xattr(key)) { + ret = -ENOTSUP; + goto out; + } else if (GF_POSIX_ACL_REQUEST(key)) { + if (stbuf && IS_DHT_LINKFILE_MODE(stbuf)) + goto out; + ret = posix_pacl_set(real_path, -1, key, value->data); + } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, + SLEN(POSIX_ACL_ACCESS_XATTR)) && + stbuf && IS_DHT_LINKFILE_MODE(stbuf)) { + goto out; + } else if (!strncmp(key, GF_INTERNAL_CTX_KEY, SLEN(GF_INTERNAL_CTX_KEY))) { + /* ignore this key value pair */ + ret = 0; + goto out; + } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) { + /* This is either by rebalance or self heal. Create the xattr if it's + * not present. Compare and update the larger value if the xattr is + * already present. + */ + if (loc == NULL) { + ret = -EINVAL; + goto out; + } + posix_mdata_iatt_from_disk(&mdata_iatt, + (posix_mdata_disk_t *)value->data); + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + ret = -op_errno; + } + goto out; + } else { + sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags); #ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "%s: key:%s flags: %u length:%d error:%s", - real_path, key, flags, value->len, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_ERROR, - "%s: key:%s flags: %u length:%d error:%s", - real_path, key, flags, value->len, - strerror (errno)); -#endif /* DARWIN */ - } - - goto out; + posix_dump_buffer(this, real_path, key, value, flags); +#endif + if (sys_ret < 0) { + ret = -errno; + if (errno == ENOENT) { + if (!posix_special_xattr(marker_xattrs, key)) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setxattr on %s failed", real_path); + } + } else { + if (errno == error_code) { + gf_msg_debug(this->name, 0, + "%s: key:%s" + "flags: %u length:%d", + real_path, key, flags, value->len); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "%s: key:%s" + "flags: %u length:%d", + real_path, key, flags, value->len); } + } + + goto out; } + } out: - return ret; + return ret; } int -posix_fhandle_pair (xlator_t *this, int fd, - char *key, data_t *value, int flags) +posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, + data_t *value, int flags, struct iatt *stbuf, fd_t *_fd) { - int sys_ret = -1; - int ret = 0; - - if (XATTR_IS_PATHINFO (key)) { - ret = -EACCES; - goto out; - } - - sys_ret = sys_fsetxattr (fd, key, value->data, - value->len, flags); - - if (sys_ret < 0) { - ret = -errno; - if (errno == ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr on fd=%d failed: %s", fd, - strerror (errno)); - } else { - + int sys_ret = -1; + int ret = 0; + + if (XATTR_IS_PATHINFO(key)) { + ret = -EACCES; + goto out; + } else if (posix_is_gfid2path_xattr(key)) { + ret = -ENOTSUP; + goto out; + } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, + SLEN(POSIX_ACL_ACCESS_XATTR)) && + stbuf && IS_DHT_LINKFILE_MODE(stbuf)) { + goto out; + } + + sys_ret = sys_fsetxattr(fd, key, value->data, value->len, flags); + + if (sys_ret < 0) { + ret = -errno; + if (errno == ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fsetxattr on fd=%d" + " failed", + fd); + } else { #ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "fd=%d: key:%s error:%s", - fd, key, strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_ERROR, - "fd=%d: key:%s error:%s", - fd, key, strerror (errno)); + if (errno == EINVAL) { + gf_msg_debug(this->name, 0, + "fd=%d: key:%s " + "error:%s", + fd, key, strerror(errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fd=%d: key:%s", fd, key); + } + +#else /* ! DARWIN */ + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fd=%d: key:%s", fd, key); #endif /* DARWIN */ - } - - goto out; } + goto out; + } else if (_fd) { + posix_set_ctime(frame, this, NULL, fd, _fd->inode, NULL); + } + out: - return ret; + return ret; } static void -del_stale_dir_handle (xlator_t *this, uuid_t gfid) +del_stale_dir_handle(xlator_t *this, uuid_t gfid) { - char newpath[PATH_MAX] = {0, }; - uuid_t gfid_curr = {0, }; - ssize_t size = -1; - gf_boolean_t stale = _gf_false; - char *hpath = NULL; - struct stat stbuf = {0, }; - struct iatt iabuf = {0, }; - - MAKE_HANDLE_GFID_PATH (hpath, this, gfid, NULL); - - /* check that it is valid directory handle */ - size = sys_lstat (hpath, &stbuf); - if (size < 0) { - gf_log (this->name, GF_LOG_DEBUG, "%s: Handle stat failed: " - "%s", hpath, strerror (errno)); - goto out; - } - - iatt_from_stat (&iabuf, &stbuf); - if (iabuf.ia_nlink != 1 || !IA_ISLNK (iabuf.ia_type)) { - gf_log (this->name, GF_LOG_DEBUG, "%s: Handle nlink %d %d", - hpath, iabuf.ia_nlink, IA_ISLNK (iabuf.ia_type)); - goto out; - } - - size = posix_handle_path (this, gfid, NULL, newpath, sizeof (newpath)); - if (size <= 0) { - if (errno == ENOENT) { - gf_log (this->name, GF_LOG_DEBUG, "%s: %s", newpath, - strerror (ENOENT)); - stale = _gf_true; - } - goto out; - } - - size = sys_lgetxattr (newpath, GFID_XATTR_KEY, gfid_curr, 16); - if (size < 0 && errno == ENOENT) { - gf_log (this->name, GF_LOG_DEBUG, "%s: %s", newpath, - strerror (ENOENT)); - stale = _gf_true; - } else if (size == 16 && gf_uuid_compare (gfid, gfid_curr)) { - gf_log (this->name, GF_LOG_DEBUG, "%s: mismatching gfid: %s, " - "at %s", hpath, uuid_utoa (gfid_curr), newpath); - stale = _gf_true; - } + char newpath[PATH_MAX] = { + 0, + }; + uuid_t gfid_curr = { + 0, + }; + ssize_t size = -1; + gf_boolean_t stale = _gf_false; + char *hpath = NULL; + struct stat stbuf = { + 0, + }; + struct iatt iabuf = { + 0, + }; + + MAKE_HANDLE_GFID_PATH(hpath, this, gfid); + + /* check that it is valid directory handle */ + size = sys_lstat(hpath, &stbuf); + if (size < 0) { + gf_msg_debug(this->name, 0, + "%s: Handle stat failed: " + "%s", + hpath, strerror(errno)); + goto out; + } + + iatt_from_stat(&iabuf, &stbuf); + if (iabuf.ia_nlink != 1 || !IA_ISLNK(iabuf.ia_type)) { + gf_msg_debug(this->name, 0, "%s: Handle nlink %d %d", hpath, + iabuf.ia_nlink, IA_ISLNK(iabuf.ia_type)); + goto out; + } + + size = posix_handle_path(this, gfid, NULL, newpath, sizeof(newpath)); + if (size <= 0) { + if (errno == ENOENT) { + gf_msg_debug(this->name, 0, "%s: %s", newpath, strerror(ENOENT)); + stale = _gf_true; + } + goto out; + } + + size = sys_lgetxattr(newpath, GFID_XATTR_KEY, gfid_curr, 16); + if (size < 0 && errno == ENOENT) { + gf_msg_debug(this->name, 0, "%s: %s", newpath, strerror(ENOENT)); + stale = _gf_true; + } else if (size == 16 && gf_uuid_compare(gfid, gfid_curr)) { + gf_msg_debug(this->name, 0, + "%s: mismatching gfid: %s, " + "at %s", + hpath, uuid_utoa(gfid_curr), newpath); + stale = _gf_true; + } out: - if (stale) { - size = sys_unlink (hpath); - if (size < 0 && errno != ENOENT) - gf_log (this->name, GF_LOG_ERROR, "%s: Failed to " - "remove handle to %s (%s)", hpath, newpath, - strerror (errno)); - } else if (size == 16) { - gf_log (this->name, GF_LOG_DEBUG, "%s: Fresh handle for " - "%s with gfid %s", hpath, newpath, - uuid_utoa (gfid_curr)); - } - return; + if (stale) { + size = sys_unlink(hpath); + if (size < 0 && errno != ENOENT) + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_STALE_HANDLE_REMOVE_FAILED, + "%s: Failed" + "to remove handle to %s", + hpath, newpath); + } else if (size == 16) { + gf_msg_debug(this->name, 0, + "%s: Fresh handle for " + "%s with gfid %s", + hpath, newpath, uuid_utoa(gfid_curr)); + } + return; } static int -janitor_walker (const char *fpath, const struct stat *sb, - int typeflag, struct FTW *ftwbuf) +janitor_walker(const char *fpath, const struct stat *sb, int typeflag, + struct FTW *ftwbuf) { - struct iatt stbuf = {0, }; - xlator_t *this = NULL; - - this = THIS; - posix_pstat (this, NULL, fpath, &stbuf); - switch (sb->st_mode & S_IFMT) { + struct iatt stbuf = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + /* posix_mdata_t is not filled, no time or size attributes + * are being used, so fine. + */ + posix_pstat(this, NULL, NULL, fpath, &stbuf, _gf_false); + switch (sb->st_mode & S_IFMT) { case S_IFREG: case S_IFBLK: case S_IFLNK: case S_IFCHR: case S_IFIFO: case S_IFSOCK: - gf_log (THIS->name, GF_LOG_TRACE, - "unlinking %s", fpath); - unlink (fpath); - if (stbuf.ia_nlink == 1) - posix_handle_unset (this, stbuf.ia_gfid, NULL); - break; + gf_msg_trace(THIS->name, 0, "unlinking %s", fpath); + sys_unlink(fpath); + if (stbuf.ia_nlink == 1) + posix_handle_unset(this, stbuf.ia_gfid, NULL); + break; case S_IFDIR: - if (ftwbuf->level) { /* don't remove top level dir */ - gf_log (THIS->name, GF_LOG_TRACE, - "removing directory %s", fpath); + if (ftwbuf->level) { /* don't remove top level dir */ + gf_msg_debug(THIS->name, 0, "removing directory %s", fpath); - rmdir (fpath); - del_stale_dir_handle (this, stbuf.ia_gfid); - } - break; - } + sys_rmdir(fpath); + del_stale_dir_handle(this, stbuf.ia_gfid); + } + break; + } - return 0; /* 0 = FTW_CONTINUE */ + return 0; /* 0 = FTW_CONTINUE */ } +void +__posix_janitor_timer_start(xlator_t *this); -static struct posix_fd * -janitor_get_next_fd (xlator_t *this) +static int +posix_janitor_task_done(int ret, call_frame_t *frame, void *data) { - struct posix_private *priv = NULL; - struct posix_fd *pfd = NULL; + xlator_t *this = NULL; + struct posix_private *priv = NULL; - struct timespec timeout; + this = data; + priv = this->private; - priv = this->private; + pthread_mutex_lock(&priv->janitor_mutex); + { + if (priv->janitor_task_stop) { + priv->janitor_task_stop = _gf_false; + pthread_cond_signal(&priv->janitor_cond); + pthread_mutex_unlock(&priv->janitor_mutex); + goto out; + } + } + pthread_mutex_unlock(&priv->janitor_mutex); - pthread_mutex_lock (&priv->janitor_lock); - { - if (list_empty (&priv->janitor_fds)) { - time (&timeout.tv_sec); - timeout.tv_sec += priv->janitor_sleep_duration; - timeout.tv_nsec = 0; - - pthread_cond_timedwait (&priv->janitor_cond, - &priv->janitor_lock, - &timeout); - goto unlock; - } + LOCK(&priv->lock); + { + __posix_janitor_timer_start(this); + } + UNLOCK(&priv->lock); - pfd = list_entry (priv->janitor_fds.next, struct posix_fd, - list); +out: + return 0; +} - list_del (priv->janitor_fds.next); +static int +posix_janitor_task(void *data) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + xlator_t *old_this = NULL; + + time_t now; + + this = data; + priv = this->private; + /* We need THIS to be set for janitor_walker */ + old_this = THIS; + THIS = this; + + if (!priv) + goto out; + + now = gf_time(); + if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { + if (priv->disable_landfill_purge) { + gf_msg_debug(this->name, 0, + "Janitor would have " + "cleaned out %s, but purge" + "is disabled.", + priv->trash_path); + } else { + gf_msg_trace(this->name, 0, "janitor cleaning out %s", + priv->trash_path); + + nftw(priv->trash_path, janitor_walker, 32, FTW_DEPTH | FTW_PHYS); } -unlock: - pthread_mutex_unlock (&priv->janitor_lock); + priv->last_landfill_check = now; + } + + THIS = old_this; - return pfd; +out: + return 0; } +static void +posix_janitor_task_initator(struct gf_tw_timer_list *timer, void *data, + unsigned long calltime) +{ + xlator_t *this = NULL; + int ret = 0; -static void * -posix_janitor_thread_proc (void *data) + this = data; + + ret = synctask_new(this->ctx->env, posix_janitor_task, + posix_janitor_task_done, NULL, this); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning janitor " + "thread failed"); + } + + return; +} + +void +__posix_janitor_timer_start(xlator_t *this) { - xlator_t * this = NULL; - struct posix_private *priv = NULL; - struct posix_fd *pfd; + struct posix_private *priv = NULL; + struct gf_tw_timer_list *timer = NULL; - time_t now; + priv = this->private; + timer = priv->janitor; - this = data; - priv = this->private; + INIT_LIST_HEAD(&timer->entry); + timer->expires = priv->janitor_sleep_duration; + timer->function = posix_janitor_task_initator; + timer->data = this; + gf_tw_add_timer(glusterfs_ctx_tw_get(this->ctx), timer); - THIS = this; + return; +} - while (1) { - time (&now); - if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { - gf_log (this->name, GF_LOG_TRACE, - "janitor cleaning out %s", priv->trash_path); +void +posix_janitor_timer_start(xlator_t *this) +{ + struct posix_private *priv = NULL; + struct gf_tw_timer_list *timer = NULL; + + priv = this->private; + + LOCK(&priv->lock); + { + if (!priv->janitor) { + timer = GF_CALLOC(1, sizeof(struct gf_tw_timer_list), + gf_common_mt_tw_timer_list); + if (!timer) { + goto unlock; + } + priv->janitor = timer; + __posix_janitor_timer_start(this); + } + } +unlock: + UNLOCK(&priv->lock); - nftw (priv->trash_path, - janitor_walker, - 32, - FTW_DEPTH | FTW_PHYS); + return; +} - priv->last_landfill_check = now; - } +static struct posix_fd * +janitor_get_next_fd(glusterfs_ctx_t *ctx) +{ + struct posix_fd *pfd = NULL; - pfd = janitor_get_next_fd (this); - if (pfd) { - if (pfd->dir == NULL) { - gf_log (this->name, GF_LOG_TRACE, - "janitor: closing file fd=%d", pfd->fd); - close (pfd->fd); - } else { - gf_log (this->name, GF_LOG_TRACE, - "janitor: closing dir fd=%p", pfd->dir); - closedir (pfd->dir); - } - - GF_FREE (pfd); - } + while (list_empty(&ctx->janitor_fds)) { + if (ctx->pxl_count == 0) { + return NULL; } - return NULL; -} + pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); + } + pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); + list_del_init(&pfd->list); -void -posix_spawn_janitor_thread (xlator_t *this) + return pfd; +} + +static void +posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) { - struct posix_private *priv = NULL; - int ret = 0; + THIS = xl; - priv = this->private; + if (pfd->dir == NULL) { + gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); + sys_close(pfd->fd); + } else { + gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); + sys_closedir(pfd->dir); + } - LOCK (&priv->lock); - { - if (!priv->janitor_present) { - ret = gf_thread_create (&priv->janitor, NULL, - posix_janitor_thread_proc, this); - - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "spawning janitor thread failed: %s", - strerror (errno)); - goto unlock; - } - - priv->janitor_present = _gf_true; - } - } -unlock: - UNLOCK (&priv->lock); + GF_FREE(pfd); } -static int -is_fresh_file (struct stat *stat) +static void * +posix_ctx_janitor_thread_proc(void *data) { - struct timeval tv; + xlator_t *xl; + struct posix_fd *pfd; + glusterfs_ctx_t *ctx = NULL; + struct posix_private *priv_fd; - gettimeofday (&tv, NULL); + ctx = data; - if ((stat->st_ctime >= (tv.tv_sec - 1)) - && (stat->st_ctime <= tv.tv_sec)) - return 1; + pthread_mutex_lock(&ctx->fd_lock); - return 0; -} + while ((pfd = janitor_get_next_fd(ctx)) != NULL) { + pthread_mutex_unlock(&ctx->fd_lock); + + xl = pfd->xl; + posix_close_pfd(xl, pfd); + + pthread_mutex_lock(&ctx->fd_lock); + priv_fd = xl->private; + priv_fd->rel_fdcount--; + if (!priv_fd->rel_fdcount) + pthread_cond_signal(&priv_fd->fd_cond); + } + + pthread_mutex_unlock(&ctx->fd_lock); + + return NULL; +} int -posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) -{ - /* The purpose of this function is to prevent a race - where an inode creation FOP (like mkdir/mknod/create etc) - races with lookup in the following way: - - {create thread} | {lookup thread} - | - t0 - mkdir ("name") | - t1 - | posix_gfid_set ("name", 2); - t2 - posix_gfid_set ("name", 1); | - t3 - lstat ("name"); | lstat ("name"); - - In the above case mkdir FOP would have resulted with GFID 2 while - it should have been GFID 1. It matters in the case where GFID would - have gotten set to 1 on other subvolumes of replciate/distribute - - The "solution" here is that, if we detect lookup is attempting to - set a GFID on a file which is created very recently, but does not - yet have a GFID (i.e, between t1 and t2), then "fake" it as though - posix_gfid_heal was called at t0 instead. - */ - - uuid_t uuid_curr; - int ret = 0; - struct stat stat = {0, }; - - if (!xattr_req) - goto out; +posix_spawn_ctx_janitor_thread(xlator_t *this) +{ + int ret = 0; + glusterfs_ctx_t *ctx = NULL; - if (sys_lstat (path, &stat) != 0) - goto out; + ctx = this->ctx; - ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16); - if (ret != 16) { - if (is_fresh_file (&stat)) { - ret = -1; - errno = ENOENT; - goto out; - } + pthread_mutex_lock(&ctx->fd_lock); + { + if (ctx->pxl_count++ == 0) { + ret = gf_thread_create(&ctx->janitor, NULL, + posix_ctx_janitor_thread_proc, ctx, + "posixctxjan"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning janitor thread failed"); + ctx->pxl_count--; + } } + } + pthread_mutex_unlock(&ctx->fd_lock); - ret = posix_gfid_set (this, path, loc, xattr_req); -out: - return ret; + return ret; } - -int -posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req) +static int +is_fresh_file(struct timespec *ts) { - int ret = 0; - data_t *data = NULL; - struct stat stat = {0, }; + struct timespec now; + int64_t elapsed; - if (!xattr_req) - goto out; + timespec_now_realtime(&now); + elapsed = (int64_t)gf_tsdiff(ts, &now); - if (sys_lstat (path, &stat) != 0) - goto out; + if (elapsed < 0) { + /* The file has been modified in the future !!! + * Is it fresh ? previous implementation considered this as a + * non-fresh file, so maintaining the same behavior. */ + return 0; + } - data = dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR); - if (data) { - ret = sys_lsetxattr (path, POSIX_ACL_ACCESS_XATTR, - data->data, data->len, 0); -#ifdef __FreeBSD__ - if (ret != -1) { - ret = 0; - } -#endif /* __FreeBSD__ */ - if (ret != 0) - goto out; + /* If the file is newer than a second, we consider it fresh. */ + return elapsed < 1000000; +} + +int +posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) +{ + /* The purpose of this function is to prevent a race + where an inode creation FOP (like mkdir/mknod/create etc) + races with lookup in the following way: + + {create thread} | {lookup thread} + | + t0 + mkdir ("name") | + t1 + | posix_gfid_set ("name", 2); + t2 + posix_gfid_set ("name", 1); | + t3 + lstat ("name"); | lstat ("name"); + + In the above case mkdir FOP would have resulted with GFID 2 while + it should have been GFID 1. It matters in the case where GFID would + have gotten set to 1 on other subvolumes of replciate/distribute + + The "solution" here is that, if we detect lookup is attempting to + set a GFID on a file which is created very recently, but does not + yet have a GFID (i.e, between t1 and t2), then "fake" it as though + posix_gfid_heal was called at t0 instead. + */ + + uuid_t uuid_curr; + int ret = 0; + struct stat stat = { + 0, + }; + struct iatt stbuf = { + 0, + }; + struct posix_private *priv = NULL; + + priv = this->private; + + if (!xattr_req) + return 0; + + if (loc->inode && priv->ctime) { + if (sys_lstat(path, &stat) != 0) { + return -errno; + } + /* stbuf is only to compare ctime, don't use it to access + * other fields as they are zero. */ + ret = posix_get_mdata_xattr(this, path, -1, loc->inode, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on gfid: %s", + uuid_utoa(loc->inode->gfid)); + return -ENOENT; } + ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); + if (ret != 16) { + /* TODO: This is a very hacky way of doing this, and very prone to + * errors and unexpected behavior. This should be changed. */ + struct timespec ts = {.tv_sec = stbuf.ia_ctime, + .tv_nsec = stbuf.ia_ctime_nsec}; + if (is_fresh_file(&ts)) { + gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, + "Fresh file: %s", path); + return -ENOENT; + } + } + } else { + if (sys_lstat(path, &stat) != 0) { + return -errno; + } + ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); + if (ret != 16) { + /* TODO: This is a very hacky way of doing this, and very prone to + * errors and unexpected behavior. This should be changed. */ + if (is_fresh_file(&stat.st_ctim)) { + gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, + "Fresh file: %s", path); + return -ENOENT; + } + } + } + + (void)posix_gfid_set(this, path, loc, xattr_req, GF_CLIENT_PID_MAX, &ret); + return 0; +} - data = dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR); - if (data) { - ret = sys_lsetxattr (path, POSIX_ACL_DEFAULT_XATTR, - data->data, data->len, 0); +int +posix_acl_xattr_set(xlator_t *this, const char *path, dict_t *xattr_req) +{ + int ret = 0; + data_t *data = NULL; + struct stat stat = { + 0, + }; + + if (!xattr_req) + goto out; + + if (sys_lstat(path, &stat) != 0) + goto out; + + data = dict_get(xattr_req, POSIX_ACL_ACCESS_XATTR); + if (data) { + ret = sys_lsetxattr(path, POSIX_ACL_ACCESS_XATTR, data->data, data->len, + 0); #ifdef __FreeBSD__ - if (ret != -1) { - ret = 0; - } + if (ret != -1) { + ret = 0; + } #endif /* __FreeBSD__ */ - if (ret != 0) - goto out; + if (ret != 0) + goto out; + } + + data = dict_get(xattr_req, POSIX_ACL_DEFAULT_XATTR); + if (data) { + ret = sys_lsetxattr(path, POSIX_ACL_DEFAULT_XATTR, data->data, + data->len, 0); +#ifdef __FreeBSD__ + if (ret != -1) { + ret = 0; } +#endif /* __FreeBSD__ */ + if (ret != 0) + goto out; + } out: - return ret; + return ret; } static int -_handle_entry_create_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) +_handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) { - int ret = -1; - posix_xattr_filler_t *filler = NULL; + int ret = -1; + posix_xattr_filler_t *filler = NULL; - filler = tmp; + filler = tmp; - if (!strcmp (GFID_XATTR_KEY, k) || - !strcmp ("gfid-req", k) || - !strcmp (POSIX_ACL_DEFAULT_XATTR, k) || - !strcmp (POSIX_ACL_ACCESS_XATTR, k) || - ZR_FILE_CONTENT_REQUEST(k)) { - return 0; - } - - ret = posix_handle_pair (filler->this, filler->real_path, k, v, - XATTR_CREATE); - if (ret < 0) { - errno = -ret; - return -1; - } + if (!strcmp(GFID_XATTR_KEY, k) || !strcmp("gfid-req", k) || + !strcmp(POSIX_ACL_DEFAULT_XATTR, k) || + !strcmp(POSIX_ACL_ACCESS_XATTR, k) || posix_xattr_ignorable(k)) { return 0; + } + + ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + XATTR_CREATE, filler->stbuf); + if (ret < 0) { + errno = -ret; + return -1; + } + return 0; } int -posix_entry_create_xattr_set (xlator_t *this, const char *path, +posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, dict_t *dict) { - int ret = -1; + int ret = -1; - posix_xattr_filler_t filler = {0,}; + posix_xattr_filler_t filler = { + 0, + }; - if (!dict) - goto out; + if (!dict) + goto out; - filler.this = this; - filler.real_path = path; + filler.this = this; + filler.real_path = path; + filler.stbuf = NULL; + filler.loc = loc; - ret = dict_foreach (dict, _handle_entry_create_keyvalue_pair, &filler); + ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler); out: - return ret; + return ret; +} + +static int +__posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd_p, + int *op_errno_p) +{ + uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; + int ret = -1; + char *real_path = NULL; + char *unlink_path = NULL; + int _fd = -1; + int op_errno = 0; + DIR *dir = NULL; + + struct posix_private *priv = NULL; + + priv = this->private; + + ret = __fd_ctx_get(fd, this, &tmp_pfd); + if (ret == 0) { + pfd = (void *)(long)tmp_pfd; + goto out; + } + if (!fd_is_anonymous(fd)) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_READ_FAILED, + "Failed to get fd context for a non-anonymous fd, " + "gfid: %s", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + MAKE_HANDLE_PATH(real_path, this, fd->inode->gfid, NULL); + if (!real_path) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_READ_FAILED, + "Failed to create handle path (%s)", uuid_utoa(fd->inode->gfid)); + ret = -1; + op_errno = EINVAL; + goto out; + } + pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = ENOMEM; + goto out; + } + pfd->fd = -1; + + if (fd->inode->ia_type == IA_IFDIR) { + dir = sys_opendir(real_path); + if (!dir) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_READ_FAILED, + "Failed to get anonymous fd for " + "real_path: %s.", + real_path); + GF_FREE(pfd); + pfd = NULL; + goto out; + } + _fd = dirfd(dir); + } + + /* Using fd->flags in case we choose to have anonymous + * fds with different flags some day. As of today it + * would be GF_ANON_FD_FLAGS and nothing else. + */ + if (fd->inode->ia_type == IA_IFREG) { + _fd = open(real_path, fd->flags); + if ((_fd == -1) && (errno == ENOENT)) { + POSIX_GET_FILE_UNLINK_PATH(priv->base_path, fd->inode->gfid, + unlink_path); + _fd = open(unlink_path, fd->flags); + } + if (_fd == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_READ_FAILED, + "Failed to get anonymous fd for " + "real_path: %s.", + real_path); + GF_FREE(pfd); + pfd = NULL; + goto out; + } + } + + pfd->fd = _fd; + pfd->dir = dir; + pfd->flags = fd->flags; + + ret = __fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (ret != 0) { + op_errno = ENOMEM; + if (_fd != -1) + sys_close(_fd); + if (dir) + sys_closedir(dir); + GF_FREE(pfd); + pfd = NULL; + goto out; + } + + ret = 0; +out: + if (ret < 0 && op_errno_p) + *op_errno_p = op_errno; + + if (pfd_p) + *pfd_p = pfd; + return ret; } +int +posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, int *op_errno) +{ + int ret; + + LOCK(&fd->inode->lock); + { + ret = __posix_fd_ctx_get(fd, this, pfd, op_errno); + } + UNLOCK(&fd->inode->lock); + + return ret; +} static int -__posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p) -{ - uint64_t tmp_pfd = 0; - struct posix_fd *pfd = NULL; - int ret = -1; - char *real_path = NULL; - int _fd = -1; - DIR *dir = NULL; - - ret = __fd_ctx_get (fd, this, &tmp_pfd); - if (ret == 0) { - pfd = (void *)(long) tmp_pfd; - ret = 0; - goto out; - } +posix_fs_health_check(xlator_t *this, char *file_path) +{ + struct posix_private *priv = NULL; + int ret = -1; + char timestamp[GF_TIMESTR_SIZE] = { + 0, + }; + int fd = -1; + int timelen = -1; + time_t time_sec = { + 0, + }; + char buff[256] = {0}; + char *op = NULL; + int op_errno = 0; + int cnt; + int timeout = 0; + struct aiocb aiocb; + + priv = this->private; + + timeout = priv->health_check_timeout; + + fd = open(file_path, O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (fd == -1) { + op_errno = errno; + op = "open_for_write"; + goto out; + } + + time_sec = gf_time(); + gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); + timelen = strlen(timestamp); + + memset(&aiocb, 0, sizeof(struct aiocb)); + aiocb.aio_fildes = fd; + aiocb.aio_buf = timestamp; + aiocb.aio_nbytes = timelen; + aiocb.aio_sigevent.sigev_notify = SIGEV_NONE; + if (aio_write(&aiocb) == -1) { + op_errno = errno; + op = "aio_write"; + goto out; + } + + cnt = 0; + /* Wait until write completion */ + while ((aio_error(&aiocb) == EINPROGRESS) && (++cnt <= timeout)) + sleep(1); + + ret = aio_error(&aiocb); + if (ret != 0) { + op_errno = errno; + op = "aio_write_error"; + goto out; + } + + ret = aio_return(&aiocb); + if (ret != timelen) { + op_errno = errno; + op = "aio_write_buf"; + ret = -1; + goto out; + } + + sys_close(fd); + + fd = open(file_path, O_RDONLY); + if (fd == -1) { + op_errno = errno; + op = "open_for_read"; + goto out; + } + + memset(&aiocb, 0, sizeof(struct aiocb)); + aiocb.aio_fildes = fd; + aiocb.aio_buf = buff; + aiocb.aio_nbytes = sizeof(buff); + if (aio_read(&aiocb) == -1) { + op_errno = errno; + op = "aio_read"; + goto out; + } + cnt = 0; + /* Wait until read completion */ + while ((aio_error(&aiocb) == EINPROGRESS) && (++cnt <= timeout)) + sleep(1); + + ret = aio_error(&aiocb); + if (ret != 0) { + op_errno = errno; + op = "aio_read_error"; + goto out; + } + + ret = aio_return(&aiocb); + if (ret != timelen) { + op_errno = errno; + op = "aio_read_buf"; + ret = -1; + goto out; + } + + if (memcmp(timestamp, buff, ret)) { + op_errno = EUCLEAN; + op = "aio_read_cmp_buf"; + ret = -1; + goto out; + } + ret = 0; +out: + if (fd != -1) { + sys_close(fd); + } - if (!fd_is_anonymous(fd)) - /* anonymous fd */ - goto out; + if (ret && file_path[0]) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, + "%s() on %s returned ret is %d error is %s", op, file_path, ret, + ret != -1 ? strerror(ret) : strerror(op_errno)); - MAKE_HANDLE_PATH (real_path, this, fd->inode->gfid, NULL); - if (!real_path) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to create handle path (%s)", - uuid_utoa (fd->inode->gfid)); - ret = -1; - goto out; + if ((op_errno == EAGAIN) || (ret == EAGAIN)) { + ret = 0; + } else { + gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, + "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, + file_path, strerror(op_errno), priv->hostname, + priv->base_path, timeout); } + } + return ret; +} - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - goto out; - } - pfd->fd = -1; +static void * +posix_health_check_thread_proc(void *data) +{ + xlator_t *this = data; + struct posix_private *priv = this->private; + uint32_t interval = priv->health_check_interval; + int ret = -1; + xlator_t *top = NULL; + xlator_t *victim = NULL; + xlator_list_t **trav_p = NULL; + int count = 0; + gf_boolean_t victim_found = _gf_false; + glusterfs_ctx_t *ctx = THIS->ctx; + char file_path[PATH_MAX]; + + /* prevent races when the interval is updated */ + if (interval == 0) + goto out; + + snprintf(file_path, sizeof(file_path) - 1, "%s/%s/health_check", + priv->base_path, GF_HIDDEN_PATH); + + gf_msg_debug(this->name, 0, + "health-check thread started, " + "on path %s, " + "interval = %d seconds", + file_path, interval); + while (1) { + /* aborting sleep() is a request to exit this thread, sleep() + * will normally not return when cancelled */ + ret = sleep(interval); + if (ret > 0) + break; + /* prevent thread errors while doing the health-check(s) */ + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + + /* Do the health-check.*/ + ret = posix_fs_health_check(this, file_path); + if (ret < 0 && priv->health_check_active) + goto abort; + if (!priv->health_check_active) + goto out; + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + } - if (fd->inode->ia_type == IA_IFDIR) { - dir = opendir (real_path); - if (!dir) { - GF_FREE (pfd); - pfd = NULL; - goto out; - } - _fd = dirfd (dir); +out: + gf_msg_debug(this->name, 0, "health-check thread exiting"); + + LOCK(&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK(&priv->lock); + + return NULL; + +abort: + LOCK(&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK(&priv->lock); + + /* health-check failed */ + gf_msg(this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED, + "health-check failed, going down"); + + xlator_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, this); + + /* Below code is use to ensure if brick multiplexing is enabled if + count is more than 1 it means brick mux has enabled + */ + if (this->ctx->active) { + top = this->ctx->active->first; + LOCK(&ctx->volfile_lock); + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + count++; + } + UNLOCK(&ctx->volfile_lock); + } + + if (count == 1) { + gf_msg(this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED, + "still alive! -> SIGTERM"); + ret = sleep(30); + + /* Need to kill the process only while brick mux has not enabled + */ + if (ret == 0) + kill(getpid(), SIGTERM); + + ret = sleep(30); + gf_msg(this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED, + "still alive! -> SIGKILL"); + if (ret == 0) + kill(getpid(), SIGKILL); + + } else if (top) { + LOCK(&ctx->volfile_lock); + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (!victim->call_cleanup && + strcmp(victim->name, priv->base_path) == 0) { + victim_found = _gf_true; + break; + } + } + UNLOCK(&ctx->volfile_lock); + if (victim_found && !victim->cleanup_starting) { + gf_log(THIS->name, GF_LOG_INFO, + "detaching not-only " + " child %s", + priv->base_path); + victim->cleanup_starting = 1; + top->notify(top, GF_EVENT_CLEANUP, victim); } + } - if (fd->inode->ia_type == IA_IFREG) { - _fd = open (real_path, O_RDWR|O_LARGEFILE); - if (_fd == -1) { - GF_FREE (pfd); - pfd = NULL; - goto out; - } + return NULL; +} + +int +posix_spawn_health_check_thread(xlator_t *xl) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = xl->private; + + LOCK(&priv->lock); + { + /* cancel the running thread */ + if (priv->health_check_active == _gf_true) { + pthread_cancel(priv->health_check); + priv->health_check_active = _gf_false; } - pfd->fd = _fd; - pfd->dir = dir; + /* prevent scheduling a check in a tight loop */ + if (priv->health_check_interval == 0) + goto unlock; - ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd); - if (ret != 0) { - if (_fd != -1) - close (_fd); - if (dir) - closedir (dir); - GF_FREE (pfd); - pfd = NULL; - goto out; + ret = gf_thread_create(&priv->health_check, NULL, + posix_health_check_thread_proc, xl, "posixhc"); + if (ret) { + priv->health_check_interval = 0; + priv->health_check_active = _gf_false; + gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_HEALTHCHECK_FAILED, + "unable to setup health-check thread"); + goto unlock; } - ret = 0; + priv->health_check_active = _gf_true; + } +unlock: + UNLOCK(&priv->lock); + return ret; +} + +void +posix_disk_space_check(xlator_t *this) +{ + struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; + double size = 0; + double percent = 0; + struct statvfs buf = {0}; + double totsz = 0; + double freesz = 0; + + GF_VALIDATE_OR_GOTO("posix-helpers", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + subvol_path = priv->base_path; + + op_ret = sys_statvfs(subvol_path, &buf); + + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on %s", subvol_path); + goto out; + } + + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + totsz = (buf.f_blocks * buf.f_bsize); + size = ((totsz * percent) / 100); + } else { + size = priv->disk_reserve; + } + + freesz = (buf.f_bfree * buf.f_bsize); + if (freesz <= size) { + priv->disk_space_full = 1; + } else { + priv->disk_space_full = 0; + } out: - if (pfd_p) - *pfd_p = pfd; - return ret; + return; } +static void * +posix_disk_space_check_thread_proc(void *data) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + uint32_t interval = 0; + int ret = -1; + + this = data; + priv = this->private; + + interval = 5; + gf_msg_debug(this->name, 0, + "disk-space thread started, " + "interval = %d seconds", + interval); + while (1) { + /* aborting sleep() is a request to exit this thread, sleep() + * will normally not return when cancelled */ + ret = sleep(interval); + if (ret > 0) + break; + /* prevent thread errors while doing the health-check(s) */ + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + + /* Do the disk-check.*/ + posix_disk_space_check(this); + if (!priv->disk_space_check_active) + goto out; + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + } + +out: + gf_msg_debug(this->name, 0, "disk space check thread exiting"); + LOCK(&priv->lock); + { + priv->disk_space_check_active = _gf_false; + } + UNLOCK(&priv->lock); + + return NULL; +} int -posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd) +posix_spawn_disk_space_check_thread(xlator_t *xl) { - int ret; + struct posix_private *priv = NULL; + int ret = -1; - LOCK (&fd->inode->lock); - { - ret = __posix_fd_ctx_get (fd, this, pfd); + priv = xl->private; + + LOCK(&priv->lock); + { + /* cancel the running thread */ + if (priv->disk_space_check_active == _gf_true) { + pthread_cancel(priv->disk_space_check); + priv->disk_space_check_active = _gf_false; } - UNLOCK (&fd->inode->lock); - return ret; + ret = gf_thread_create(&priv->disk_space_check, NULL, + posix_disk_space_check_thread_proc, xl, + "posixrsv"); + if (ret) { + priv->disk_space_check_active = _gf_false; + gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, + "unable to setup disk space check thread"); + goto unlock; + } + + priv->disk_space_check_active = _gf_true; + } +unlock: + UNLOCK(&priv->lock); + return ret; } int -posix_fs_health_check (xlator_t *this) -{ - struct posix_private *priv = NULL; - int ret = -1; - char *subvol_path = NULL; - char timestamp[256] = {0,}; - int fd = -1; - int timelen = -1; - int nofbytes = 0; - time_t time_sec = {0,}; - char buff[64] = {0}; - char file_path[PATH_MAX] = {0}; - - GF_VALIDATE_OR_GOTO (this->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out); - - subvol_path = priv->base_path; - snprintf (file_path, sizeof (file_path), "%s/%s/health_check", - subvol_path, GF_HIDDEN_PATH); - - time_sec = time (NULL); - gf_time_fmt (timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); - timelen = strlen (timestamp); - - fd = open (file_path, O_CREAT|O_RDWR, 0644); - if (fd == -1) { - gf_log (this->name, GF_LOG_WARNING, - "open() on %s returned: %s", file_path, - strerror (errno)); - goto out; - } - nofbytes = write (fd, timestamp, timelen); - if (nofbytes != timelen) { - gf_log (this->name, GF_LOG_WARNING, - "write() on %s returned: %s", file_path, - strerror (errno)); - goto out; - } - /* Seek the offset to the beginning of the file, so that the offset for - read is from beginning of file */ - lseek(fd, 0, SEEK_SET); - nofbytes = read (fd, buff, timelen); - if (nofbytes == -1) { - gf_log (this->name, GF_LOG_WARNING, - "read() on %s returned: %s", file_path, - strerror (errno)); - goto out; - } +posix_fsyncer_pick(xlator_t *this, struct list_head *head) +{ + struct posix_private *priv = NULL; + int count = 0; + + priv = this->private; + pthread_mutex_lock(&priv->fsync_mutex); + { + while (list_empty(&priv->fsyncs)) + pthread_cond_wait(&priv->fsync_cond, &priv->fsync_mutex); + + count = priv->fsync_queue_count; + priv->fsync_queue_count = 0; + list_splice_init(&priv->fsyncs, head); + } + pthread_mutex_unlock(&priv->fsync_mutex); + + return count; +} + +void +posix_fsyncer_process(xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync) +{ + struct posix_fd *pfd = NULL; + int ret = -1; + int op_errno = 0; + + ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_GET_FDCTX_FAILED, + "could not get fdctx for fd(%s)", + uuid_utoa(stub->args.fd->inode->gfid)); + call_unwind_error(stub, -1, op_errno); + return; + } + + if (do_fsync && pfd) { + if (stub->args.datasync) + ret = sys_fdatasync(pfd->fd); + else + ret = sys_fsync(pfd->fd); + } else { ret = 0; -out: - if (fd != -1) { - close (fd); - } - return ret; + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "could not fstat fd(%s)", uuid_utoa(stub->args.fd->inode->gfid)); + call_unwind_error(stub, -1, errno); + return; + } + + call_unwind_error(stub, 0, 0); } -static void * -posix_health_check_thread_proc (void *data) +static void +posix_fsyncer_syncfs(xlator_t *this, struct list_head *head) { - xlator_t *this = NULL; - struct posix_private *priv = NULL; - uint32_t interval = 0; - int ret = -1; - - this = data; - priv = this->private; + call_stub_t *stub = NULL; + struct posix_fd *pfd = NULL; + int ret = -1; + + stub = list_entry(head->prev, call_stub_t, list); + ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL); + if (!ret) + (void)gf_syncfs(pfd->fd); +} - /* prevent races when the interval is updated */ - interval = priv->health_check_interval; - if (interval == 0) - goto out; +void * +posix_fsyncer(void *d) +{ + xlator_t *this = d; + struct posix_private *priv = NULL; + call_stub_t *stub = NULL; + call_stub_t *tmp = NULL; + struct list_head list; + int count = 0; + gf_boolean_t do_fsync = _gf_true; - gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, " - "interval = %d seconds", interval); + priv = this->private; - while (1) { - /* aborting sleep() is a request to exit this thread, sleep() - * will normally not return when cancelled */ - ret = sleep (interval); - if (ret > 0) - break; + for (;;) { + INIT_LIST_HEAD(&list); - /* prevent thread errors while doing the health-check(s) */ - pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); + count = posix_fsyncer_pick(this, &list); - /* Do the health-check.*/ - ret = posix_fs_health_check (this); + gf_nanosleep(priv->batch_fsync_delay_usec * GF_US_IN_NS); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "health_check on %s returned: %s", - priv->base_path, strerror (errno)); - goto abort; - } + gf_msg_debug(this->name, 0, "picked %d fsyncs", count); - pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); + switch (priv->batch_fsync_mode) { + case BATCH_NONE: + case BATCH_REVERSE_FSYNC: + break; + case BATCH_SYNCFS: + case BATCH_SYNCFS_SINGLE_FSYNC: + case BATCH_SYNCFS_REVERSE_FSYNC: + posix_fsyncer_syncfs(this, &list); + break; } -out: - gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting"); + if (priv->batch_fsync_mode == BATCH_SYNCFS) + do_fsync = _gf_false; + else + do_fsync = _gf_true; - LOCK (&priv->lock); + list_for_each_entry_safe_reverse(stub, tmp, &list, list) { - priv->health_check_active = _gf_false; + list_del_init(&stub->list); + + posix_fsyncer_process(this, stub, do_fsync); + + if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC) + do_fsync = _gf_false; } - UNLOCK (&priv->lock); + } +} - return NULL; +/** + * TODO: move fd/inode interfaces into a single routine.. + */ +static int32_t +posix_fetch_signature_xattr(char *real_path, const char *key, dict_t *xattr, + size_t *xsize) +{ + int32_t ret = 0; + char *memptr = NULL; + ssize_t xattrsize = 0; + char val_buf[2048] = { + 0, + }; + gf_boolean_t have_val = _gf_false; + + xattrsize = sys_lgetxattr(real_path, key, val_buf, sizeof(val_buf) - 1); + if (xattrsize >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) + xattrsize = sys_lgetxattr(real_path, key, NULL, 0); + if ((errno == ENOATTR) || (errno == ENODATA)) + return 0; + if (xattrsize == -1) + goto error_return; + } + memptr = GF_MALLOC(xattrsize + 1, gf_posix_mt_char); + if (!memptr) + goto error_return; + if (have_val) { + memcpy(memptr, val_buf, xattrsize); + memptr[xattrsize] = '\0'; + } else { + bzero(memptr, xattrsize + 1); + ret = sys_lgetxattr(real_path, key, memptr, xattrsize); + if (ret == -1) + goto freemem; + } + ret = dict_set_dynptr(xattr, (char *)key, memptr, xattrsize); + if (ret) + goto freemem; -abort: - /* health-check failed */ - gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down"); - xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this); + if (xsize) + *xsize = xattrsize; + + return 0; + +freemem: + GF_FREE(memptr); +error_return: + return -1; +} + +static int32_t +posix_fd_fetch_signature_xattr(int fd, const char *key, dict_t *xattr, + size_t *xsize) +{ + int32_t ret = 0; + char *memptr = NULL; + ssize_t xattrsize = 0; + + xattrsize = sys_fgetxattr(fd, key, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) + return 0; + if (xattrsize == -1) + goto error_return; + + memptr = GF_CALLOC(xattrsize + 1, sizeof(char), gf_posix_mt_char); + if (!memptr) + goto error_return; + ret = sys_fgetxattr(fd, key, memptr, xattrsize); + if (ret == -1) + goto freemem; - ret = sleep (30); - if (ret == 0) { - gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM"); - kill (getpid(), SIGTERM); + ret = dict_set_dynptr(xattr, (char *)key, memptr, xattrsize); + if (ret) + goto freemem; + + if (xsize) + *xsize = xattrsize; + + return 0; + +freemem: + GF_FREE(memptr); +error_return: + return -1; +} + +/** + * Fetch on-disk ongoing version and object signature extended attribute. + * Be generous to absence of xattrs (just *absence*, other errors are + * propagated up to the invoker), higher layer (br-stub) takes care of + * interpreting the xattrs for anomalies. + */ +int32_t +posix_get_objectsignature(char *real_path, dict_t *xattr) +{ + int32_t ret = 0; + size_t signsize = 0; + + ret = posix_fetch_signature_xattr(real_path, BITROT_CURRENT_VERSION_KEY, + xattr, NULL); + if (ret) + goto error_return; + + ret = posix_fetch_signature_xattr(real_path, BITROT_SIGNING_VERSION_KEY, + xattr, &signsize); + if (ret) + goto delkey1; + + ret = dict_set_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY, + (uint32_t)signsize); + if (ret) + goto delkey2; + + return 0; + +delkey2: + dict_del(xattr, BITROT_SIGNING_VERSION_KEY); +delkey1: + dict_del(xattr, BITROT_CURRENT_VERSION_KEY); +error_return: + return -EINVAL; +} + +int32_t +posix_fdget_objectsignature(int fd, dict_t *xattr) +{ + int32_t ret = 0; + size_t signsize = 0; + + ret = posix_fd_fetch_signature_xattr(fd, BITROT_CURRENT_VERSION_KEY, xattr, + NULL); + if (ret) + goto error_return; + + ret = posix_fd_fetch_signature_xattr(fd, BITROT_SIGNING_VERSION_KEY, xattr, + &signsize); + if (ret) + goto delkey1; + + ret = dict_set_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY, + (uint32_t)signsize); + if (ret) + goto delkey2; + + return 0; + +delkey2: + dict_del(xattr, BITROT_SIGNING_VERSION_KEY); +delkey1: + dict_del(xattr, BITROT_CURRENT_VERSION_KEY); +error_return: + return -EINVAL; +} + +/* + * posix_resolve_dirgfid_to_path: + * It converts given dirgfid to path by doing recursive readlinks at the + * backend. If bname is given, it suffixes bname to dir path to form the + * complete path else it doesn't. It allocates memory for the path and is + * caller's responsibility to free the same. If bname is NULL and pargfid + * is ROOT, then it returns "/" + **/ + +int32_t +posix_resolve_dirgfid_to_path(const uuid_t dirgfid, const char *brick_path, + const char *bname, char **path) +{ + char *linkname = NULL; + char *dir_handle = NULL; + char *pgfidstr = NULL; + char *saveptr = NULL; + ssize_t len = 0; + int ret = 0; + uuid_t tmp_gfid = { + 0, + }; + uuid_t pargfid = { + 0, + }; + char gpath[PATH_MAX] = { + 0, + }; + char result[PATH_MAX] = { + 0, + }; + char result1[PATH_MAX] = { + 0, + }; + char *dir_name = NULL; + char pre_dir_name[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + gf_uuid_copy(pargfid, dirgfid); + if (!path || gf_uuid_is_null(pargfid)) { + ret = -1; + goto out; + } + + if (__is_root_gfid(pargfid)) { + if (bname) { + snprintf(result, PATH_MAX, "/%s", bname); + *path = gf_strdup(result); + } else { + *path = gf_strdup("/"); + } + return ret; + } + + dir_handle = alloca(PATH_MAX); + linkname = alloca(PATH_MAX); + (void)snprintf(gpath, PATH_MAX, "%s/.glusterfs/", brick_path); + + while (!(__is_root_gfid(pargfid))) { + len = snprintf(dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath, + pargfid[0], pargfid[1], uuid_utoa(pargfid)); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + len = sys_readlink(dir_handle, linkname, PATH_MAX); + if (len < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED, + "could not read the " + "link from the gfid handle %s", + dir_handle); + ret = -1; + goto out; } - ret = sleep (30); - if (ret == 0) { - gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL"); - kill (getpid(), SIGKILL); + linkname[len] = '\0'; + + pgfidstr = strtok_r(linkname + SLEN("../../00/00/"), "/", &saveptr); + dir_name = strtok_r(NULL, "/", &saveptr); + + if (pre_dir_name[0] != '\0') { /* Remove '/' at the end */ + len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name); + } else { + len = snprintf(result, PATH_MAX, "%s", dir_name); + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; } + snprintf(pre_dir_name, sizeof(pre_dir_name), "%s", result); + + gf_uuid_parse(pgfidstr, tmp_gfid); + gf_uuid_copy(pargfid, tmp_gfid); + } + + if (bname) { + len = snprintf(result1, PATH_MAX, "/%s/%s", result, bname); + } else { + len = snprintf(result1, PATH_MAX, "/%s", result); + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + *path = gf_strdup(result1); + if (*path == NULL) { + ret = -1; + goto out; + } + +out: + return ret; +} + +posix_inode_ctx_t * +__posix_inode_ctx_get(inode_t *inode, xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + posix_inode_ctx_t *ctx_p = NULL; + + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret == 0) { + return (posix_inode_ctx_t *)(uintptr_t)ctx_uint; + } + + ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_posix_mt_inode_ctx_t); + if (!ctx_p) return NULL; + + pthread_mutex_init(&ctx_p->xattrop_lock, NULL); + pthread_mutex_init(&ctx_p->write_atomic_lock, NULL); + pthread_mutex_init(&ctx_p->pgfid_lock, NULL); + + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set(inode, this, &ctx_uint); + if (ret < 0) { + pthread_mutex_destroy(&ctx_p->xattrop_lock); + pthread_mutex_destroy(&ctx_p->write_atomic_lock); + pthread_mutex_destroy(&ctx_p->pgfid_lock); + GF_FREE(ctx_p); + return NULL; + } + + return ctx_p; } -void -posix_spawn_health_check_thread (xlator_t *xl) +int +__posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx) { - struct posix_private *priv = NULL; - int ret = -1; + posix_inode_ctx_t *ctx_p = NULL; - priv = xl->private; + ctx_p = __posix_inode_ctx_get(inode, this); + if (ctx_p == NULL) + return -1; - LOCK (&priv->lock); - { - /* cancel the running thread */ - if (priv->health_check_active == _gf_true) { - pthread_cancel (priv->health_check); - priv->health_check_active = _gf_false; - } + ctx_p->unlink_flag = ctx; - /* prevent scheduling a check in a tight loop */ - if (priv->health_check_interval == 0) - goto unlock; - - ret = gf_thread_create (&priv->health_check, NULL, - posix_health_check_thread_proc, xl); - if (ret < 0) { - priv->health_check_interval = 0; - priv->health_check_active = _gf_false; - gf_log (xl->name, GF_LOG_ERROR, - "unable to setup health-check thread: %s", - strerror (errno)); - goto unlock; - } + return 0; +} - /* run the thread detached, resources will be freed on exit */ - pthread_detach (priv->health_check); - priv->health_check_active = _gf_true; - } -unlock: - UNLOCK (&priv->lock); +int +posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx) +{ + int ret = -1; + + LOCK(&inode->lock); + { + ret = __posix_inode_ctx_set_unlink_flag(inode, this, ctx); + } + UNLOCK(&inode->lock); + + return ret; } int -posix_fsyncer_pick (xlator_t *this, struct list_head *head) +__posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, + posix_inode_ctx_t **ctx) { - struct posix_private *priv = NULL; - int count = 0; + posix_inode_ctx_t *ctx_p = NULL; - priv = this->private; - pthread_mutex_lock (&priv->fsync_mutex); - { - while (list_empty (&priv->fsyncs)) - pthread_cond_wait (&priv->fsync_cond, - &priv->fsync_mutex); + ctx_p = __posix_inode_ctx_get(inode, this); + if (ctx_p == NULL) + return -1; - count = priv->fsync_queue_count; - priv->fsync_queue_count = 0; - list_splice_init (&priv->fsyncs, head); - } - pthread_mutex_unlock (&priv->fsync_mutex); + *ctx = ctx_p; - return count; + return 0; } +int +posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, posix_inode_ctx_t **ctx) +{ + int ret = 0; -void -posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync) + LOCK(&inode->lock); + { + ret = __posix_inode_ctx_get_all(inode, this, ctx); + } + UNLOCK(&inode->lock); + + return ret; +} + +gf_boolean_t +posix_is_bulk_removexattr(char *name, dict_t *xdata) { - struct posix_fd *pfd = NULL; - int ret = -1; - struct posix_private *priv = NULL; + if (name && (name[0] == '\0') && xdata) + return _gf_true; + return _gf_false; +} + +int32_t +posix_set_iatt_in_dict(dict_t *dict, struct iatt *preop, struct iatt *postop) +{ + int ret = -1; + struct iatt *stbuf = NULL; + int32_t len = sizeof(struct iatt); + struct iatt *prebuf = NULL; + struct iatt *postbuf = NULL; - priv = this->private; + if (!dict) + return ret; - ret = posix_fd_ctx_get (stub->args.fd, this, &pfd); + if (postop) { + stbuf = GF_MALLOC(len, gf_common_mt_char); + if (!stbuf) + goto out; + memcpy(stbuf, postop, len); + ret = dict_set_iatt(dict, DHT_IATT_IN_XDATA_KEY, stbuf, false); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "could not get fdctx for fd(%s)", - uuid_utoa (stub->args.fd->inode->gfid)); - call_unwind_error (stub, -1, EINVAL); - return; + GF_FREE(stbuf); + goto out; } + } - if (do_fsync) { - if (stub->args.datasync) - ret = sys_fdatasync (pfd->fd); - else - ret = sys_fsync (pfd->fd); - } else { - ret = 0; + if (preop) { + prebuf = GF_MALLOC(len, gf_common_mt_char); + if (!prebuf) + goto out; + memcpy(prebuf, preop, len); + ret = dict_set_iatt(dict, GF_PRESTAT, prebuf, false); + if (ret < 0) { + GF_FREE(prebuf); + goto out; } + } - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "could not fstat fd(%s)", - uuid_utoa (stub->args.fd->inode->gfid)); - call_unwind_error (stub, -1, errno); - return; + if (postop) { + postbuf = GF_MALLOC(len, gf_common_mt_char); + if (!postbuf) + goto out; + memcpy(postbuf, postop, len); + ret = dict_set_iatt(dict, GF_POSTSTAT, postbuf, false); + if (ret < 0) { + GF_FREE(postbuf); + goto out; } + } - call_unwind_error (stub, 0, 0); + ret = 0; +out: + return ret; } - -static void -posix_fsyncer_syncfs (xlator_t *this, struct list_head *head) -{ - call_stub_t *stub = NULL; - struct posix_fd *pfd = NULL; - int ret = -1; - - stub = list_entry (head->prev, call_stub_t, list); - ret = posix_fd_ctx_get (stub->args.fd, this, &pfd); - if (ret) - return; - -#ifdef GF_LINUX_HOST_OS - /* syncfs() is not "declared" in RHEL's glibc even though - the kernel has support. - */ -#include <sys/syscall.h> -#include <unistd.h> -#ifdef SYS_syncfs - syscall (SYS_syncfs, pfd->fd); -#else - sync(); -#endif -#else - sync(); -#endif +mode_t +posix_override_umask(mode_t mode, mode_t mode_bit) +{ + gf_msg_debug("posix", 0, "The value of mode is %u", mode); + mode = mode >> 9; /* 3x3 (bits for each octal digit)*/ + mode = (mode << 9) | mode_bit; + gf_msg_debug("posix", 0, "The value of mode is %u", mode); + return mode; } +int +posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata) +{ + int ret = 0; + size_t xattrsize = 0; + data_t *val = NULL; -void * -posix_fsyncer (void *d) + if (!xdata) + return 0; + + LOCK(&fd->inode->lock); + { + val = dict_get_sizen(xdata, GF_PROTECT_FROM_EXTERNAL_WRITES); + if (val) { + ret = sys_fsetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES, + val->data, val->len, 0); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_FAILED, errno, + "setxattr failed key %s", + GF_PROTECT_FROM_EXTERNAL_WRITES); + } + + goto out; + } + + if (dict_get_sizen(xdata, GF_AVOID_OVERWRITE)) { + xattrsize = sys_fgetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES, + NULL, 0); + if ((xattrsize == -1) && + ((errno == ENOATTR) || (errno == ENODATA))) { + ret = 0; + } else { + ret = -1; + } + } + } +out: + UNLOCK(&fd->inode->lock); + return ret; +} + +gf_cs_obj_state +posix_cs_heal_state(xlator_t *this, const char *realpath, int *fd, + struct iatt *buf) { - xlator_t *this = d; - struct posix_private *priv = NULL; - call_stub_t *stub = NULL; - call_stub_t *tmp = NULL; - struct list_head list; - int count = 0; - gf_boolean_t do_fsync = _gf_true; + gf_boolean_t remote = _gf_false; + gf_boolean_t downloading = _gf_false; + int ret = 0; + gf_cs_obj_state state = GF_CS_ERROR; + size_t xattrsize = 0; + + if (!buf) { + ret = -1; + goto out; + } + + if (fd) { + xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + remote = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "fgetxattr" + " failed"); + state = GF_CS_ERROR; + goto out; + } else { + remote = _gf_true; + } + + xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_DOWNLOADING, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + downloading = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "fgetxattr" + " failed"); + state = GF_CS_ERROR; + goto out; + } else { + downloading = _gf_true; + } + } else { + xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_REMOTE, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + remote = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "getxattr" + " failed"); + state = GF_CS_ERROR; + goto out; + } else { + remote = _gf_true; + } + + xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_DOWNLOADING, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + downloading = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "getxattr" + " failed"); + state = GF_CS_ERROR; + goto out; + } else { + downloading = _gf_true; + } + } - priv = this->private; + if (remote && downloading) { + if (fd) { + ret = sys_fremovexattr(*fd, GF_CS_OBJECT_DOWNLOADING); + } else { + ret = sys_lremovexattr(realpath, GF_CS_OBJECT_DOWNLOADING); + } - for (;;) { - INIT_LIST_HEAD (&list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "failed to remove xattr, repair failed"); + state = GF_CS_ERROR; + goto out; + } + + if (buf->ia_size) { + if (fd) { + ret = sys_ftruncate(*fd, 0); + } else { + ret = sys_truncate(realpath, 0); + } + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "truncate failed. File is in inconsistent" + " state"); + state = GF_CS_ERROR; + goto out; + } + } + + state = GF_CS_REMOTE; + goto out; + + } else if (remote) { + if (buf->ia_size) { + if (fd) { + ret = sys_ftruncate(*fd, 0); + } else { + ret = sys_truncate(realpath, 0); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "truncate failed. File is in inconsistent" + " state"); + state = GF_CS_ERROR; + goto out; + } + } + + state = GF_CS_REMOTE; + goto out; + } else if (downloading) { + if (buf->ia_size) { + if (fd) { + ret = sys_fremovexattr(*fd, GF_CS_OBJECT_DOWNLOADING); + } else { + ret = sys_lremovexattr(realpath, GF_CS_OBJECT_DOWNLOADING); + } + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + "failed to remove xattr, repair failed"); + state = GF_CS_ERROR; + goto out; + } - count = posix_fsyncer_pick (this, &list); + state = GF_CS_LOCAL; + goto out; + } + } - usleep (priv->batch_fsync_delay_usec); + state = GF_CS_LOCAL; +out: + gf_msg_debug(this->name, 0, "heal state returned %d", state); + return state; +} - gf_log (this->name, GF_LOG_DEBUG, - "picked %d fsyncs", count); +gf_cs_obj_state +posix_cs_check_status(xlator_t *this, const char *realpath, int *fd, + struct iatt *buf) +{ + gf_boolean_t remote = _gf_false; + gf_boolean_t downloading = _gf_false; + int ret = 0; + gf_cs_obj_state state = GF_CS_LOCAL; + size_t xattrsize = 0; + int op_errno = 0; + + if (fd) { + xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + remote = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "getxattr " + "failed err %d", + errno); + goto out; + } else { + remote = _gf_true; + } - switch (priv->batch_fsync_mode) { - case BATCH_NONE: - case BATCH_REVERSE_FSYNC: - break; - case BATCH_SYNCFS: - case BATCH_SYNCFS_SINGLE_FSYNC: - case BATCH_SYNCFS_REVERSE_FSYNC: - posix_fsyncer_syncfs (this, &list); - break; - } + xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_DOWNLOADING, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + downloading = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "getxattr " + "failed err : %d", + errno); - if (priv->batch_fsync_mode == BATCH_SYNCFS) - do_fsync = _gf_false; - else - do_fsync = _gf_true; + goto out; + } else { + downloading = _gf_true; + } + } + + if (realpath) { + xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_REMOTE, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + remote = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "getxattr " + "failed err : %d", + errno); + goto out; + } else { + remote = _gf_true; + } + + xattrsize = sys_lgetxattr(realpath, GF_CS_OBJECT_DOWNLOADING, NULL, 0); + if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) { + downloading = _gf_false; + } else if (xattrsize == -1) { + ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "getxattr " + "failed err : %d", + errno); + goto out; + } else { + downloading = _gf_true; + } + } - list_for_each_entry_safe_reverse (stub, tmp, &list, list) { - list_del_init (&stub->list); +out: + if (ret) { + gf_msg("POSIX", GF_LOG_ERROR, 0, op_errno, + "getxattr failed " + "with %d", + op_errno); + state = GF_CS_ERROR; + return state; + } + + if ((remote && downloading) || (remote && buf && buf->ia_size)) { + state = GF_CS_REPAIR; + gf_msg_debug(this->name, 0, "status is REPAIR"); + return state; + } + + if (remote) + state = GF_CS_REMOTE; + else if (downloading) + state = GF_CS_DOWNLOADING; + else + state = GF_CS_LOCAL; + + gf_msg_debug(this->name, 0, "state returned is %d", state); + return state; +} - posix_fsyncer_process (this, stub, do_fsync); +int +posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state, + char const *path, int *fd) +{ + int ret = 0; + char *value = NULL; + size_t xattrsize = 0; + + if (!rsp) { + ret = -1; + goto out; + } + + if (!(*rsp)) { + *rsp = dict_new(); + if (!(*rsp)) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "failed to" + " create dict"); + ret = -1; + goto out; + } + } + + ret = dict_set_uint64(*rsp, GF_CS_OBJECT_STATUS, state); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "failed to set " + "dict"); + ret = -1; + goto out; + } + + if (fd) { + xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, NULL, 0); + if (xattrsize != -1) { + value = GF_CALLOC(1, xattrsize + 1, gf_posix_mt_char); + if (!value) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "no memory for value"); + ret = -1; + goto out; + } + /* TODO: Add check for ENODATA */ + xattrsize = sys_fgetxattr(*fd, GF_CS_OBJECT_REMOTE, value, + xattrsize + 1); + if (xattrsize == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + " getxattr failed for key %s", GF_CS_OBJECT_REMOTE); + goto out; + } else { + value[xattrsize] = '\0'; + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + " getxattr failed for key %s", GF_CS_OBJECT_REMOTE); + goto out; + } + } else { + xattrsize = sys_lgetxattr(path, GF_CS_OBJECT_REMOTE, NULL, 0); + if (xattrsize != -1) { + value = GF_CALLOC(1, xattrsize + 1, gf_posix_mt_char); + if (!value) { + ret = -1; + goto out; + } - if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC) - do_fsync = _gf_false; - } + xattrsize = sys_lgetxattr(path, GF_CS_OBJECT_REMOTE, value, + xattrsize + 1); + if (xattrsize == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + " getxattr failed for key %s", GF_CS_OBJECT_REMOTE); + goto out; + } else { + value[xattrsize] = '\0'; + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, errno, + " getxattr failed for key %s", GF_CS_OBJECT_REMOTE); + goto out; + } + } + + if (ret == 0) { + ret = dict_set_str(*rsp, GF_CS_OBJECT_REMOTE, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "failed to set" + "value"); } + } + +out: + return ret; } -/** - * TODO: move fd/inode interfaces into a single routine.. +/* This function checks the status of the file and updates the xattr response. + * Also it repairs the state of the file which could have been resulted from a + * crash or transient failures. */ -static int32_t -posix_fetch_signature_xattr (char *real_path, - const char *key, dict_t *xattr, size_t *xsize) +int +posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + struct iatt *buf, const char *realpath, dict_t *xattr_req, + dict_t **xattr_rsp, gf_boolean_t ignore_failure) { - int32_t ret = 0; - char *memptr = NULL; - ssize_t xattrsize = 0; + gf_cs_obj_state state = GF_CS_ERROR; + int ret = 0; + gf_boolean_t is_cs_obj_status = _gf_false; + gf_boolean_t is_cs_obj_repair = _gf_false; - xattrsize = sys_lgetxattr (real_path, key, NULL, 0); - if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) - return 0; - if (xattrsize == -1) - goto error_return; + if (dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS)) + is_cs_obj_status = _gf_true; + if (dict_get_sizen(xattr_req, GF_CS_OBJECT_REPAIR)) + is_cs_obj_repair = _gf_true; - memptr = GF_CALLOC (xattrsize + 1, sizeof (char), gf_posix_mt_char); - if (!memptr) - goto error_return; - ret = sys_lgetxattr (real_path, key, memptr, xattrsize); - if (ret == -1) - goto freemem; + if (!(is_cs_obj_status || is_cs_obj_repair)) + return 0; - ret = dict_set_dynptr (xattr, (char *)key, memptr, xattrsize); - if (ret) - goto freemem; + if (fd) { + LOCK(&fd->inode->lock); + if (is_cs_obj_status) { + state = posix_cs_check_status(this, NULL, pfd, buf); + gf_msg_debug(this->name, 0, "state : %d", state); + ret = posix_cs_set_state(this, xattr_rsp, state, NULL, pfd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "posix_cs_set_state failed"); + } + + if (ignore_failure) { + ret = 0; + goto unlock; + } else { + if (state != GF_CS_LOCAL || ret != 0) { + ret = -1; + goto unlock; + } + } + } - if (xsize) - *xsize = xattrsize; + if (is_cs_obj_repair) { + state = posix_cs_check_status(this, NULL, pfd, buf); + gf_msg_debug(this->name, 0, "state : %d", state); - return 0; + if (state == GF_CS_REPAIR) { + state = posix_cs_heal_state(this, NULL, pfd, buf); - freemem: - GF_FREE (memptr); - error_return: - return -1; + if (state == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "repair check failed"); + } + } + + ret = posix_cs_set_state(this, xattr_rsp, state, NULL, pfd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "posix_cs_set_state failed"); + if (ignore_failure) + ret = 0; + else + ret = -1; + goto unlock; + } + } + } else { + if (!loc->inode) { + ret = 0; + goto out; + } + + LOCK(&loc->inode->lock); + if (is_cs_obj_status) { + state = posix_cs_check_status(this, realpath, NULL, buf); + gf_msg_debug(this->name, 0, "state : %d", state); + ret = posix_cs_set_state(this, xattr_rsp, state, realpath, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "posix_cs_set_state failed"); + } + + if (ignore_failure) { + ret = 0; + goto unlock; + } else { + if (state != GF_CS_LOCAL || ret != 0) { + ret = -1; + goto unlock; + } + } + } + + if (is_cs_obj_repair) { + state = posix_cs_check_status(this, realpath, NULL, buf); + gf_msg_debug(this->name, 0, "state : %d", state); + + if (state == GF_CS_REPAIR) { + state = posix_cs_heal_state(this, realpath, NULL, buf); + + if (state == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "repair check failed"); + } + } + + ret = posix_cs_set_state(this, xattr_rsp, state, realpath, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "posix_cs_set_state failed"); + if (ignore_failure) + ret = 0; + else + ret = -1; + goto unlock; + } + } + } + +unlock: + if (fd) + UNLOCK(&fd->inode->lock); + else + UNLOCK(&loc->inode->lock); +out: + return ret; } -static int32_t -posix_fd_fetch_signature_xattr (int fd, - const char *key, dict_t *xattr, size_t *xsize) +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) { - int32_t ret = 0; - char *memptr = NULL; - ssize_t xattrsize = 0; + int ret = -1; - xattrsize = sys_fgetxattr (fd, key, NULL, 0); - if ((xattrsize == -1) && ((errno == ENOATTR) || (errno == ENODATA))) - return 0; - if (xattrsize == -1) - goto error_return; + if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT, + "%s received on %s file (%s)", fop, + (inode->ia_type == IA_IFBLK) ? "block" : "char", + uuid_utoa(inode->gfid)); + goto out; + } - memptr = GF_CALLOC (xattrsize + 1, sizeof (char), gf_posix_mt_char); - if (!memptr) - goto error_return; - ret = sys_fgetxattr (fd, key, memptr, xattrsize); - if (ret == -1) - goto freemem; + ret = 0; - ret = dict_set_dynptr (xattr, (char *)key, memptr, xattrsize); - if (ret) - goto freemem; +out: + return ret; +} - if (xsize) - *xsize = xattrsize; +void +posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) +{ + int ret = 0; + char val[4096] = { + 0, + }; - return 0; + if (!xattr_req) + return; - freemem: - GF_FREE (memptr); - error_return: - return -1; + if (!dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS)) + return; + + if (fd != -1) { + ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_size = atoll(val); + } else { + /* Safe to assume that the other 2 xattrs are also not set*/ + return; + } + ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blksize = atoll(val); + } + ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blocks = atoll(val); + } + } else { + ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_size = atoll(val); + } else { + /* Safe to assume that the other 2 xattrs are also not set*/ + return; + } + ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blksize = atoll(val); + } + ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blocks = atoll(val); + } + } } -/** - * Fetch on-disk ongoing version and object signature extended attribute. - * Be generous to absence of xattrs (just *absence*, other errors are - * propagated up to the invoker), higher layer (br-stub) takes care of - * interpreting the xattrs for anomalies. - */ -int32_t -posix_get_objectsignature (char *real_path, dict_t *xattr) +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) { - int32_t ret = 0; - size_t signsize = 0; + int op_ret = 0; + ssize_t size = 0; + char value_buf[4096] = { + 0, + }; + gf_boolean_t have_val = _gf_false; + data_t *arg_data = NULL; + char *xattr_name = NULL; + size_t xattr_len = 0; + gf_boolean_t is_stale = _gf_false; + + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name == NULL) { + op_ret = 0; + return is_stale; + } + + xattr_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_len); + if (!arg_data) { + op_ret = 0; + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + return is_stale; + } + + size = sys_lgetxattr(par_path, xattr_name, value_buf, + sizeof(value_buf) - 1); + + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) path (%s) failed due to" + " buffer overflow", + xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + } + if (size < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed, path : %s", xattr_name, + par_path); + goto out; + } + } - ret = posix_fetch_signature_xattr - (real_path, BITROT_CURRENT_VERSION_KEY, xattr, NULL); - if (ret) - goto error_return; + if (!have_val) { + size = sys_lgetxattr(par_path, xattr_name, value_buf, size); + if (size < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed (%s)", xattr_name, + strerror(errno)); + goto out; + } + } - ret = posix_fetch_signature_xattr - (real_path, BITROT_SIGNING_VERSION_KEY, xattr, &signsize); - if (ret) - goto delkey1; + if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { + gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, + "failing preop as on-disk xattr value differs from argument " + "value for key %s", + xattr_name); + op_ret = -1; + } - ret = dict_set_uint32 - (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, (uint32_t) signsize); - if (ret) - goto delkey2; +out: + dict_deln(xdata, xattr_name, xattr_len); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); - return 0; + if (op_ret == -1) { + is_stale = _gf_true; + } - delkey2: - dict_del (xattr, BITROT_SIGNING_VERSION_KEY); - delkey1: - dict_del (xattr, BITROT_CURRENT_VERSION_KEY); - error_return: - return -EINVAL; + return is_stale; } -int32_t -posix_fdget_objectsignature (int fd, dict_t *xattr) +/* Delete user xattr from the file at the file-path specified by data and from + * dict */ +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data) { - int32_t ret = 0; - size_t signsize = 0; - - ret = posix_fd_fetch_signature_xattr - (fd, BITROT_CURRENT_VERSION_KEY, xattr, NULL); - if (ret) - goto error_return; + int ret; + char *real_path = data; - ret = posix_fd_fetch_signature_xattr - (fd, BITROT_SIGNING_VERSION_KEY, xattr, &signsize); - if (ret) - goto delkey1; + ret = sys_lremovexattr(real_path, k); + if (ret) { + gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, + "removexattr failed. key %s path %s", k, real_path); + } - ret = dict_set_uint32 - (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, (uint32_t) signsize); - if (ret) - goto delkey2; - - return 0; + dict_del(dict, k); - delkey2: - dict_del (xattr, BITROT_SIGNING_VERSION_KEY); - delkey1: - dict_del (xattr, BITROT_CURRENT_VERSION_KEY); - error_return: - return -EINVAL; + return ret; } diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c new file mode 100644 index 00000000000..6d54d37e5aa --- /dev/null +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -0,0 +1,6004 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <regex.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include <glusterfs/checksum.h> +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include "posix-handle.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> +#include "glusterfs3-xdr.h" +#include <glusterfs/glusterfs-acl.h> +#include "posix-messages.h" +#include "posix-metadata.h" +#include <glusterfs/events.h> +#include "posix-gfid-path.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/common-utils.h> + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR \ + uid_t old_fsuid; \ + gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) \ + do { \ + old_fsuid = setfsuid(uid); \ + old_fsgid = setfsgid(gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() \ + do { \ + setfsuid(old_fsuid); \ + setfsgid(old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#endif + +static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, + NULL}; + +void +posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, + char *loc) +{ + int ret = 0; + uuid_t uuid; + + if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS)) + return; + + if (!(*rsp)) { + *rsp = dict_new(); + if (!(*rsp)) { + return; + } + } + + if (fd != -1) { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for fd %d", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", + GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } + } else { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for loc %s", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", + GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } + } + return; +} + +int32_t +posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + struct iatt buf = { + 0, + }; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_private *priv = NULL; + char *real_path = NULL; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + MAKE_INODE_HANDLE(real_path, this, loc, &buf); + + if (op_ret == -1) { + op_errno = errno; + if (op_errno == ENOENT) { + gf_msg_debug(this->name, 0, + "lstat on gfid-handle %s (path: %s)" + "failed: %s", + real_path ? real_path : "<null>", loc->path, + strerror(op_errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED, + "lstat on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); + } + goto out; + } + if (xdata) { + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &buf); + + posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, + &xattr_rsp, _gf_true); + + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); + } + + posix_update_iatt_buf(&buf, -1, real_path, xdata); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + + return 0; +} + +static int +posix_do_chmod(xlator_t *this, const char *path, struct iatt *stbuf) +{ + int32_t ret = -1; + mode_t mode = 0; + mode_t mode_bit = 0; + struct posix_private *priv = NULL; + struct stat stat; + int is_symlink = 0; + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + ret = sys_lstat(path, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED, + "lstat failed: %s", path); + goto out; + } + + if (S_ISLNK(stat.st_mode)) + is_symlink = 1; + + if (S_ISDIR(stat.st_mode)) { + mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_directory_mask) | + priv->force_directory_mode; + mode = posix_override_umask(mode, mode_bit); + } else { + mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_mask) | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + } + ret = lchmod(path, mode); + if ((ret == -1) && (errno == ENOSYS)) { + /* in Linux symlinks are always in mode 0777 and no + such call as lchmod exists. + */ + gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = sys_chmod(path, mode); + } +out: + return ret; +} + +static int +posix_do_chown(xlator_t *this, const char *path, struct iatt *stbuf, + int32_t valid) +{ + int32_t ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = sys_lchown(path, uid, gid); + + return ret; +} + +static int +posix_do_utimes(xlator_t *this, const char *path, struct iatt *stbuf, int valid) +{ + int32_t ret = -1; +#if defined(HAVE_UTIMENSAT) + struct timespec tv[2] = {{ + 0, + }, + { + 0, + }}; +#else + struct timeval tv[2] = {{ + 0, + }, + { + 0, + }}; +#endif + struct stat stat; + int is_symlink = 0; + + ret = sys_lstat(path, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%s", + path); + goto out; + } + + if (S_ISLNK(stat.st_mode)) + is_symlink = 1; + + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv[0].tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec); + } else { + /* atime is not given, use current values */ + tv[0].tv_sec = ST_ATIM_SEC(&stat); + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC(&stat)); + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv[1].tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec); + } else { + /* mtime is not given, use current values */ + tv[1].tv_sec = ST_MTIM_SEC(&stat); + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC(&stat)); + } + + ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); + if ((ret == -1) && (errno == ENOSYS)) { + gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); + } + +out: + return ret; +} + +int +posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + dict_t *xattr_rsp = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_path, this, loc, &statpre); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "setattr (lstat) on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); + goto out; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + op_ret = posix_do_chown(this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, + "setattr (chown) on %s " + "failed", + real_path); + goto out; + } + } + + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_chmod(this, real_path, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED, + "setattr (chmod) on gfid-handle %s (path: %s) " + "failed", + real_path, loc->path); + goto out; + } + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_utimes(this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, + "setattr (utimes) on gfid-handle %s (path: %s) " + "failed", + real_path, loc->path); + goto out; + } + posix_update_utime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); + } + + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); + } + + if (!valid) { + op_ret = sys_lchown(real_path, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, + "lchown (gfid-handle: %s, path: %s, -1, -1) " + "failed", + real_path, loc->path); + + goto out; + } + } + + op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &statpost, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); + + if (xdata) + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &statpost); + posix_update_iatt_buf(&statpre, -1, real_path, xdata); + posix_update_iatt_buf(&statpost, -1, real_path, xdata); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, &statpre, &statpost, + xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + + return 0; +} + +int32_t +posix_do_fchown(xlator_t *this, int fd, struct iatt *stbuf, int32_t valid) +{ + int ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = sys_fchown(fd, uid, gid); + + return ret; +} + +int32_t +posix_do_fchmod(xlator_t *this, int fd, struct iatt *stbuf) +{ + int32_t ret = -1; + mode_t mode = 0; + mode_t mode_bit = 0; + struct posix_private *priv = NULL; + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_mask) | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + ret = sys_fchmod(fd, mode); +out: + return ret; +} + +static int +posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) +{ + int32_t ret = -1; + struct timeval tv[2] = {{ + 0, + }, + { + 0, + }}; + struct stat stat = { + 0, + }; + gf_boolean_t fstat_executed = _gf_false; + + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv[0].tv_sec = stbuf->ia_atime; + tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; + } else { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + fstat_executed = _gf_true; + /* atime is not given, use current values */ + tv[0].tv_sec = ST_ATIM_SEC(&stat); + tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000; + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv[1].tv_sec = stbuf->ia_mtime; + tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; + } else { + if (!fstat_executed) { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + } + /* mtime is not given, use current values */ + tv[1].tv_sec = ST_MTIM_SEC(&stat); + tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000; + } + + ret = sys_futimes(fd, tv); + if (ret == -1) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "%d", fd); + +out: + return ret; +} + +int +posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpre); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fsetattr (fstat) failed on fd=%p", fd); + goto out; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + op_ret = posix_do_fchown(this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED, + "fsetattr (fchown) failed" + " on fd=%p", + fd); + goto out; + } + } + + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_fchmod(this, pfd->fd, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHMOD_FAILED, + "fsetattr (fchmod) failed" + " on fd=%p", + fd); + goto out; + } + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_futimes(this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, + "fsetattr (futimes) on " + "failed fd=%p", + fd); + goto out; + } + posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); + } + + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); + } + + if (!valid) { + op_ret = sys_fchown(pfd->fd, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED, + "fchown (%d, -1, -1) failed", pfd->fd); + + goto out; + } + } + + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpost); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fsetattr (fstat) failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &statpost); + + if (xdata) + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, + &statpost); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, &statpre, &statpost, + xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + + return 0; +} + +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata) +{ + int32_t ret = -1; + int32_t op_errno = 0; + struct posix_fd *pfd = NULL; + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + + /* fallocate case is special so call posix_disk_space_check separately + for every fallocate fop instead of calling posix_disk_space with + thread after every 5 sec sleep to working correctly storage.reserve + option behaviour + */ + if (priv->disk_reserve) + posix_disk_space_check(this); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); + +overwrite: + check_space_error = _gf_true; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + + if (xdata && dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { + locked = _gf_true; + pthread_mutex_lock(&ctx->write_atomic_lock); + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); + goto unlock; + } + + if (xdata) { + ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL, + xdata, rsp_xdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + ret = -EIO; + goto unlock; + } + } + + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED, + "fallocate failed on %s offset: %jd, " + "len:%zu, flags: %d", + uuid_utoa(fd->inode->gfid), offset, len, flags); + goto unlock; + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); + goto unlock; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); + +unlock: + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { +#ifdef FALLOC_FL_KEEP_SIZE + if (flags & FALLOC_FL_KEEP_SIZE) { + goto overwrite; + } +#endif + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +out: + SET_TO_OLD_FS_ID(); + if (ret == ENOSPC) + ret = -ENOSPC; + + return ret; +} + +char * +_page_aligned_alloc(size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC(1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF(alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) +{ + off_t num_vect = 0; + off_t num_loop = 1; + off_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + off_t remain = 0; + off_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC(num_vect, sizeof(struct iovec), gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC(vect_size, sizeof(char), gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + if (sys_lseek(fd, offset, SEEK_SET) < 0) { + op_ret = -1; + goto err; + } + + for (idx = 0; idx < num_loop; idx++) { + op_ret = sys_writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + if (op_ret != (vect_size * num_vect)) { + op_ret = -1; + errno = ENOSPC; + goto err; + } + } + if (extra) { + op_ret = sys_writev(fd, vector, extra); + if (op_ret < 0) + goto err; + if (op_ret != (vect_size * extra)) { + op_ret = -1; + errno = ENOSPC; + goto err; + } + } + if (remain) { + vector[0].iov_len = remain; + op_ret = sys_writev(fd, vector, 1); + if (op_ret < 0) + goto err; + if (op_ret != remain) { + op_ret = -1; + errno = ENOSPC; + goto err; + } + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, struct iatt *statpre, struct iatt *statpost, + dict_t *xdata, dict_t **rsp_xdata) +{ + int32_t ret = -1; + int32_t op_errno = 0; + int32_t flags = 0; + struct posix_fd *pfd = NULL; + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { + locked = _gf_true; + pthread_mutex_lock(&ctx->write_atomic_lock); + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd = %p", fd); + goto out; + } + + if (xdata) { + ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL, + xdata, rsp_xdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state " + "check failed, fd %p", + fd); + ret = -EIO; + goto out; + } + } + + posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); + /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. + * If it fails, fall back to _posix_do_zerofill() and an optional fsync. + */ + flags = FALLOC_FL_ZERO_RANGE; + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == 0) { + goto fsync; + } else { + ret = -errno; + if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) { + goto out; + } + } + + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED, + "zerofill failed on fd %d length %" PRId64, pfd->fd, len); + goto out; + } + +fsync: + if (pfd->flags & (O_SYNC | O_DSYNC)) { + ret = sys_fsync(pfd->fd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_WRITEV_FAILED, + "fsync() in writev on fd" + "%d failed", + pfd->fd); + ret = -errno; + goto out; + } + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post operation fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); + +out: + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + SET_TO_OLD_FS_ID(); + + return ret; +} + +int32_t +posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + dict_t *rsp_xdata = NULL; + +#ifdef FALLOC_FL_KEEP_SIZE + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; +#endif /* FALLOC_FL_KEEP_SIZE */ + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, + &statpost, xdata, &rsp_xdata); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata); + return 0; +} + +int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + dict_t *rsp_xdata = NULL; +#ifndef FALLOC_FL_KEEP_SIZE + ret = EOPNOTSUPP; + +#else /* FALLOC_FL_KEEP_SIZE */ + int32_t flags = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, + &statpost, xdata, &rsp_xdata); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + +err: +#endif /* FALLOC_FL_KEEP_SIZE */ + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, rsp_xdata); + return 0; +} + +int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + struct posix_private *priv = NULL; + int op_ret = -1; + int op_errno = EINVAL; + dict_t *rsp_xdata = NULL; + gf_boolean_t check_space_error = _gf_false; + struct posix_fd *pfd = NULL; + struct stat statbuf = { + 0, + }; + + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + +overwrite: + check_space_error = _gf_true; + ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, + xdata, &rsp_xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto unwind; + } + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + +out: + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, + rsp_xdata); + return 0; +} + +int32_t +posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + /* + * IPC is for inter-translator communication. If one gets here, it + * means somebody sent one that nobody else recognized, which is an + * error much like an uncaught exception. + */ + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE, + "GF_LOG_IPC(%d) not handled", op); + STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL); + return 0; +} + +int32_t +posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ +#ifdef HAVE_SEEK_HOLE + struct posix_fd *pfd = NULL; + off_t ret = -1; + int err = 0; + int whence = 0; + struct iatt preop = { + 0, + }; + dict_t *rsp_xdata = NULL; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + switch (what) { + case GF_SEEK_DATA: + whence = SEEK_DATA; + break; + case GF_SEEK_HOLE: + whence = SEEK_HOLE; + break; + default: + err = ENOTSUP; + gf_msg(this->name, GF_LOG_ERROR, ENOTSUP, P_MSG_SEEK_UNKOWN, + "don't know what to seek"); + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &err); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + if (xdata) { + ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, + xdata, &rsp_xdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + ret = -EIO; + goto out; + } + } + + ret = sys_lseek(pfd->fd, offset, whence); + if (ret == -1) { + err = errno; + gf_msg(this->name, fop_log_level(GF_FOP_SEEK, err), err, + P_MSG_SEEK_FAILED, "seek failed on fd %d length %" PRId64, + pfd->fd, offset); + goto out; + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(seek, frame, (ret == -1 ? -1 : 0), err, + (ret == -1 ? -1 : ret), rsp_xdata); +#else + STACK_UNWIND_STRICT(seek, frame, -1, EINVAL, 0, NULL); +#endif + return 0; +} + +int32_t +posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + char *real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + DIR *dir = NULL; + struct posix_fd *pfd = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(fd, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_errno = ESTALE; + goto out; + } + + op_ret = -1; + dir = sys_opendir(real_path); + + if (dir == NULL) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, + "opendir failed on gfid-handle: %s (path: %s)", real_path, + loc->path); + goto out; + } + + op_ret = dirfd(dir); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, + "dirfd() failed (path: %s, gfid-handle: %s", loc->path, + real_path); + goto out; + } + + pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->dir = dir; + pfd->dir_eof = -1; + pfd->fd = op_ret; + + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd" + "context path=%s " + "gfid-handle= %s,fd=%p", + loc->path, real_path, fd); + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (dir) { + (void)sys_closedir(dir); + dir = NULL; + } + if (pfd) { + GF_FREE(pfd); + pfd = NULL; + } + } + + SET_TO_OLD_FS_ID(); + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL); + return 0; +} + +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + +int32_t +posix_releasedir(xlator_t *this, fd_t *fd) +{ + struct posix_fd *pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd); + goto out; + } + + pfd = (struct posix_fd *)(long)tmp_pfd; + if (!pfd->dir) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, + "pfd->dir is NULL for fd=%p", fd); + goto out; + } + posix_add_fd_to_cleanup(this, pfd); + +out: + return 0; +} + +int32_t +posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) +{ + char *dest = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct iatt stbuf = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(loc, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + dest = alloca(size + 1); + + MAKE_INODE_HANDLE(real_path, this, loc, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", loc->path ? loc->path : "<null>"); + goto out; + } + + op_ret = sys_readlink(real_path, dest, size); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, + "readlink on gfid-handle: %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + dest[op_ret] = 0; +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); + + return 0; +} + +int32_t +posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + struct posix_private *priv = NULL; + struct iatt prebuf = { + 0, + }; + struct iatt postbuf = { + 0, + }; + dict_t *rsp_xdata = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + MAKE_INODE_HANDLE(real_path, this, loc, &prebuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on (path: %s gfid-handle: %s) " + "failed", + loc->path, real_path ? real_path : "<null>"); + goto out; + } + + if (xdata) { + op_ret = posix_cs_maintenance(this, NULL, loc, NULL, &prebuf, real_path, + xdata, &rsp_xdata, _gf_false); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, path %s", loc->path); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&prebuf, -1, real_path, xdata); + op_ret = sys_truncate(real_path, offset); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, + "truncate on gfid-handle: %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postbuf, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on gfid-handle %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &postbuf); + + op_ret = 0; +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, &prebuf, &postbuf, + NULL); + + return 0; +} + +int32_t +posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + int32_t _fd = -1; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; + struct iatt preop = { + 0, + }; + dict_t *rsp_xdata = NULL; + struct iatt stbuf = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + if (loc->inode && ((loc->inode->ia_type == IA_IFBLK) || + (loc->inode->ia_type == IA_IFCHR))) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "open received on a block/char file (%s)", + uuid_utoa(loc->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + if (flags & O_CREAT) + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_INODE_HANDLE(real_path, this, loc, &stbuf); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + if (IA_ISLNK(stbuf.ia_type)) { + op_ret = -1; + op_errno = ELOOP; + goto out; + } + + op_ret = -1; + SET_FS_ID(frame->root->uid, frame->root->gid); + + if (priv->o_direct) + flags |= O_DIRECT; + + _fd = sys_open(real_path, flags, priv->force_create_mode); + if (_fd == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, + "open on gfid-handle %s (path: %s), flags: %d", real_path, + loc->path, flags); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); + + pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + GF_FREE(pfd); + goto out; + } + + posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, + &rsp_xdata, _gf_true); + } + + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context gfid-handle=%s path=%s fd=%p", + real_path, loc->path, fd); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (_fd != -1) { + sys_close(_fd); + } + } + + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); + + return 0; +} + +int +posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private *priv = NULL; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec vec = { + 0, + }; + struct posix_fd *pfd = NULL; + struct iatt stbuf = { + 0, + }; + struct iatt preop = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + VALIDATE_OR_GOTO(fd->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "readv received on a block/char file (%s)", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (!size) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_INVALID_ARGUMENT, + "size=%" GF_PRI_SIZET, size); + goto out; + } + + iobuf = iobuf_get_page_aligned(this->ctx->iobuf_pool, size, ALIGN_SIZE); + if (!iobuf) { + op_errno = ENOMEM; + goto out; + } + + _fd = pfd->fd; + + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_pread(_fd, iobuf->ptr, size, offset); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READ_FAILED, + "read failed on gfid=%s, " + "fd=%p, offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + "buf=%p", + uuid_utoa(fd->inode->gfid), fd, offset, size, iobuf->ptr); + goto out; + } + + GF_ATOMIC_ADD(priv->read_value, op_ret); + + vec.iov_base = iobuf->ptr; + vec.iov_len = op_ret; + + iobref = iobref_new(); + + iobref_add(iobref, iobuf); + + /* + * readv successful, and we need to get the stat of the file + * we read from + */ + + op_ret = posix_fdstat(this, fd->inode, _fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &stbuf); + + /* Hack to notify higher layers of EOF. */ + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) + op_errno = ENOENT; + + op_ret = vec.iov_len; + +out: + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref, + rsp_xdata); + + if (iobref) + iobref_unref(iobref); + if (iobuf) + iobuf_unref(iobuf); + + return 0; +} + +int32_t +__posix_pwritev(int fd, struct iovec *vector, int count, off_t offset) +{ + int32_t op_ret = 0; + int idx = 0; + int retval = 0; + off_t internal_off = 0; + + if (!vector) + return -EFAULT; + + internal_off = offset; + for (idx = 0; idx < count; idx++) { + retval = sys_pwrite(fd, vector[idx].iov_base, vector[idx].iov_len, + internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + op_ret += retval; + internal_off += retval; + } + +err: + return op_ret; +} + +int32_t +__posix_writev(int fd, struct iovec *vector, int count, off_t startoff, + int odirect) +{ + int32_t op_ret = 0; + int idx = 0; + int max_buf_size = 0; + int retval = 0; + char *buf = NULL; + char *alloc_buf = NULL; + off_t internal_off = 0; + + /* Check for the O_DIRECT flag during open() */ + if (!odirect) + return __posix_pwritev(fd, vector, count, startoff); + + for (idx = 0; idx < count; idx++) { + if (max_buf_size < vector[idx].iov_len) + max_buf_size = vector[idx].iov_len; + } + + alloc_buf = _page_aligned_alloc(max_buf_size, &buf); + if (!alloc_buf) { + op_ret = -errno; + goto err; + } + + internal_off = startoff; + for (idx = 0; idx < count; idx++) { + memcpy(buf, vector[idx].iov_base, vector[idx].iov_len); + + /* not sure whether writev works on O_DIRECT'd fd */ + retval = sys_pwrite(fd, buf, vector[idx].iov_len, internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + + op_ret += retval; + internal_off += retval; + } + +err: + GF_FREE(alloc_buf); + + return op_ret; +} + +dict_t * +_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, P_MSG_XATTR_FAILED, + "fd: %p inode: %p" + "gfid:%s", + fd, inode ? inode : 0, + inode ? uuid_utoa(inode->gfid) : "N/A"); + goto out; + } + + if (!xdata) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s", + uuid_utoa(fd->inode->gfid), GLUSTERFS_OPEN_FD_COUNT); + } + } + + if (dict_get(xdata, GLUSTERFS_ACTIVE_FD_COUNT)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT, + fd->inode->active_fd_count); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s", + uuid_utoa(fd->inode->gfid), GLUSTERFS_ACTIVE_FD_COUNT); + } + } + + if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s", + uuid_utoa(fd->inode->gfid), GLUSTERFS_WRITE_IS_APPEND); + } + } +out: + return rsp_xdata; +} + +int32_t +posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t write_append = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; + int totlen = 0; + int idx = 0; + + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); + VALIDATE_OR_GOTO(fd, unwind); + VALIDATE_OR_GOTO(fd->inode, unwind); + VALIDATE_OR_GOTO(vector, unwind); + VALIDATE_OR_GOTO(this->private, unwind); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, unwind); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + +overwrite: + + check_space_error = _gf_true; + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "writev received on a block/char file (%s)", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + ret = posix_check_internal_writes(this, fd, _fd, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) + write_append = _gf_true; + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators (shard + * as of today). + */ + + op_ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (write_append || update_atomic) { + locked = _gf_true; + pthread_mutex_lock(&ctx->write_atomic_lock); + } + + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) { + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&preop, _fd, NULL, xdata); + if (locked && write_append) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } + + op_ret = __posix_writev(_fd, vector, count, offset, + (pfd->flags & O_DIRECT)); + + if (locked && (!update_atomic)) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED, + "write failed: offset %" PRIu64 ",", offset); + goto out; + } + + rsp_xdata = _fill_writev_xdata(fd, xdata, this, is_append); + /* writev successful, we also need to get the stat of + * the file we wrote to + */ + + ret = posix_fdstat(this, fd->inode, _fd, &postop); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (flags & (O_SYNC | O_DSYNC)) { + ret = sys_fsync(_fd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITEV_FAILED, + "fsync() in writev on fd %d failed", _fd); + op_ret = -1; + op_errno = errno; + goto out; + } + } + + GF_ATOMIC_ADD(priv->write_value, op_ret); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto unwind; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto unwind; + } + + for (idx = 0; idx < count; idx++) { + totlen = vector[idx].iov_len; + } + + if ((offset + totlen <= statbuf.st_size) && + !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd_in = -1; + int _fd_out = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd_in = NULL; + struct posix_fd *pfd_out = NULL; + struct iatt preop_dst = { + 0, + }; + struct iatt postop_dst = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd_in, out); + VALIDATE_OR_GOTO(fd_in->inode, out); + VALIDATE_OR_GOTO(fd_out, out); + VALIDATE_OR_GOTO(fd_out->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) + goto out; + + if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) + goto out; + + ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_in); + goto out; + } + + _fd_in = pfd_in->fd; + + ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_out); + goto out; + } + + _fd_out = pfd_out->fd; + + /* + * Currently, the internal write is checked via xdata which + * is set by some xlator above. It could be due to several of + * the reasons such as healing or a snapshot operation happening + * using copy_file_range. As of now (i.e. writing the patch with + * this change) none of the xlators above posix are using the + * internal write with copy_file_range. In future it might + * change. Atleast as of now the hope is that, when that happens + * this functon or fop does not require additional changes for + * handling internal writes. + */ + ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd_out); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators. + * This is similar to the atomic write operation. atmoic write is + * (i.e. prestat + write + poststat) used by shard as of now. In case, + * some xlator needs copy_file_range to be atomic from prestat and postat + * prespective (i.e. prestat + copy_file_range + poststat) then it has + * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. + */ + + op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (update_atomic) { + ret = pthread_mutex_lock(&ctx->write_atomic_lock); + if (!ret) + locked = _gf_true; + else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, + "failed to hold write atomic lock on %s", + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + } + + op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Since, only the destination file (fd_out) is undergoing + * modification, the write related tests are done on that. + * i.e. this is treater similar to as if the destination file + * undergoing write fop from maintenance perspective. + */ + if (xdata) { + op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, + NULL, xdata, &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd_out); + op_errno = EIO; + goto out; + } + } + + /* + * NOTE: This is just doing a single execution of copy_file_range + * system call. If the returned value of this system call is less + * than len, then should we keep doing it in a for loop until the + * copy_file_range of all the len bytes is done? + * Check the example program provided in the man page of + * copy_file_range. + * If so, then a separate variables for both off_in and off_out + * should be used which are initialized to off_in and off_out + * that this function call receives, but then advanced by the + * value returned by sys_copy_file_range and then use that as + * off_in and off_out for next instance of copy_file_range execution. + */ + op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, + flags); + + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, + "copy_file_range failed: fd_in: %p (gfid: %s) ," + " fd_out %p (gfid:%s)", + fd_in, uuid_utoa_r(fd_in->inode->gfid, in_uuid_str), fd_out, + uuid_utoa_r(fd_out->inode->gfid, out_uuid_str)); + goto out; + } + + /* + * Let this be as it is for now. This function collects + * infomration such as open fd count etc. So, even though + * is_append does not apply to copy_file_range, for now, + * allowing it to be recorded in the dict as _gf_false. + */ + rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); + + /* copy_file_range successful, we also need to get the stat of + * the file we wrote to (i.e. destination file or fd_out). + */ + ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Also perform the stat on the source fd (i.e. fd_in). For now, + * allowing it to be done within the locked region if the request + * is for atomic operation (and update) of copy_file_range. + */ + ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_in); + goto out; + } + + /* + * The core logic of what time attributes are to be updated + * on a fop is decided at client side xlator utime. + * All the remaining fops call posix_set_ctime function + * to update the {a,m,c}time. But, for all the other fops, + * the operation is happening on only one file (or inode). + * But here, there are 2 fds (source and destination). Hence + * the new function below to update the appropriate times for + * both the source and the destination file. + * For the source file, if at all anything has to be updated, + * it would be atime (as that file is only read, not updated). + * For the destination file, the attributes that require the + * modification would be mtime and ctime. + * What times have to be changed is actually determined by + * utime xlator. But, all of them would be in frame->root->flags. + * So, currently posix assumes that, the atime flag is for + * the source file and the other 2 flags are for the destination + * file. Since, the assumption is rigid (i.e. atime for source + * and {m,c}time for destination), the below function is called + * posix_set_ctime_cfr (cfr standing for copy_file_range). + * FUTURE TODO: + * In future, some other functionality or fop might operate + * simultaneously on 2 files. Then, depending upon what that new + * fop does or what are its requirements, the below function might + * require changes to become generic for consumption in case of + * simultaneous operations on 2 files. + */ + posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, + NULL, pfd_out->fd, fd_out->inode, &postop_dst); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + /* + * Record copy_file_range in priv->write_value for now. + * If not needed, remove below section of code along with + * this comment (or add comment to explain why it is not + * needed). + */ + GF_ATOMIC_ADD(priv->write_value, op_ret); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, + &preop_dst, &postop_dst, rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t +posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + char *real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct statvfs buf = { + 0, + }; + struct posix_private *priv = NULL; + int shared_by = 1; + double percent = 0; + uint64_t reserved_blocks = 0; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(this->private, out); + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + priv = this->private; + + op_ret = sys_statvfs(real_path, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on gfid-handle %s (path: %s)", real_path, + loc->path); + goto out; + } + + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5); + } else { + if (buf.f_bsize) { + reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) / + buf.f_bsize; + } + } + + if (buf.f_bfree > reserved_blocks) { + buf.f_bfree = (buf.f_bfree - reserved_blocks); + if (buf.f_bavail > buf.f_bfree) { + buf.f_bavail = buf.f_bfree; + } + } else { + buf.f_bfree = 0; + buf.f_bavail = 0; + } + + shared_by = priv->shared_brick_count; + if (shared_by > 1) { + buf.f_blocks /= shared_by; + buf.f_bfree /= shared_by; + buf.f_bavail /= shared_by; + buf.f_files /= shared_by; + buf.f_ffree /= shared_by; + buf.f_favail /= shared_by; + } + + if (!priv->export_statfs) { + buf.f_blocks = 0; + buf.f_bfree = 0; + buf.f_bavail = 0; + buf.f_files = 0; + buf.f_ffree = 0; + buf.f_favail = 0; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, NULL); + return 0; +} + +int32_t +posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + struct posix_fd *pfd = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL); + + return 0; +} + +int32_t +posix_release(xlator_t *this, fd_t *fd) +{ + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + pfd = (struct posix_fd *)(long)tmp_pfd; + if (pfd->dir) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL, + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + + posix_add_fd_to_cleanup(this, pfd); + +out: + return 0; +} + +int +posix_batch_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub(frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock(&priv->fsync_mutex); + { + list_add_tail(&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal(&priv->fsync_cond); + } + pthread_mutex_unlock(&priv->fsync_mutex); + + return 0; +} + +int32_t +posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_fd *pfd = NULL; + int ret = -1; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + +#ifdef GF_DARWIN_HOST_OS + /* Always return success in case of fsync in MAC OS X */ + op_ret = 0; + goto out; +#endif + + priv = this->private; + + if (priv->batch_fsync_mode && xdata && dict_get(xdata, "batch-fsync")) { + posix_batch_fsync(frame, this, fd, datasync, xdata); + return 0; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd not found in fd's ctx"); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (datasync) { + op_ret = sys_fdatasync(_fd); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED, + "fdatasync on fd=%p" + "failed:", + fd); + goto out; + } + } else { + op_ret = sys_fsync(_fd); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED, + "fsync on fd=%p " + "failed", + fd); + goto out; + } + } + + op_ret = posix_fdstat(this, fd->inode, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, &preop, &postop, NULL); + + return 0; +} + +static int gf_posix_xattr_enotsup_log; +static int +_handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + filler->flags, filler->stbuf); +} + +#ifdef GF_DARWIN_HOST_OS +static int +map_xattr_flags(int flags) +{ + /* DARWIN has different defines on XATTR_ flags. + There do not seem to be a POSIX standard + Parse any other flags over. + */ + int darwinflags = flags & + ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); + if (GF_XATTR_CREATE & flags) + darwinflags |= XATTR_CREATE; + if (GF_XATTR_REPLACE & flags) + darwinflags |= XATTR_REPLACE; + return darwinflags; +} +#endif + +int32_t +posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *acl_xattr = NULL; + struct iatt preop = {0}; + struct iatt postop = {0}; + int32_t ret = 0; + ssize_t acl_size = 0; + dict_t *xattr = NULL; + dict_t *subvol_xattrs = NULL; + posix_xattr_filler_t filler = { + 0, + }; + struct posix_private *priv = NULL; + struct iatt tmp_stbuf = { + 0, + }; + data_t *tdata = NULL; + char *cs_var = NULL; + gf_cs_obj_state state = -1; + int i = 0; + int len; + struct mdata_iatt mdata_iatt = { + 0, + }; + int8_t sync_backend_xattrs = _gf_false; + data_pair_t *custom_xattrs; + data_t *keyval = NULL; + char **xattrs_to_heal = get_xattrs_to_heal(); + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(dict, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); + if (ret == 0) { + /* This is initiated by lookup when ctime feature is enabled to create + * "trusted.glusterfs.mdata" xattr if not present. These are the files + * which were created when ctime feature is disabled. + */ + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + op_ret = -1; + } + goto out; + } + + posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); + + op_ret = -1; + + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + /* the io-stats-dump key should not reach disk */ + dict_del(dict, GF_XATTR_IOSTATS_DUMP_KEY); + + tdata = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE); + if (tdata) { + /*TODO: move the following to a different function */ + LOCK(&loc->inode->lock); + { + state = posix_cs_check_status(this, real_path, NULL, &preop); + if (state != GF_CS_LOCAL) { + op_errno = EINVAL; + ret = posix_cs_set_state(this, &xattr, state, real_path, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed"); + } + goto unlock; + } + + ret = posix_pstat(this, loc->inode, loc->gfid, real_path, + &tmp_stbuf, _gf_true); + if (ret) { + op_errno = EINVAL; + goto unlock; + } + + cs_var = alloca(4096); + sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); + + /*TODO: may be should consider nano-second also */ + if (strncmp(cs_var, tdata->data, tdata->len) > 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "mtime " + "passed is different from seen by file now." + " Will skip truncating the file"); + ret = -1; + op_errno = EINVAL; + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); + + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, + ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); + + ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); + + ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); + goto unlock; + } + + memset(cs_var, 0, 4096); + if (loc->path[0] == '/') { + for (i = 1; i < strlen(loc->path); i++) { + cs_var[i - 1] = loc->path[i]; + } + + cs_var[i] = '\0'; + gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); + } + + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, + strlen(cs_var), flags); + if (ret) { + op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "setxattr failed - %s" + " %d", + GF_CS_OBJECT_SIZE, ret); + goto unlock; + } + + ret = sys_truncate(real_path, 0); + if (ret) { + op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "truncate failed - %s" + " %d", + GF_CS_OBJECT_SIZE, ret); + ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); + if (ret) { + op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "removexattr " + "failed post processing- %s" + " %d", + GF_CS_OBJECT_SIZE, ret); + } + goto unlock; + } else { + state = GF_CS_REMOTE; + ret = posix_cs_set_state(this, &xattr, state, real_path, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed"); + } + } + } + unlock: + UNLOCK(&loc->inode->lock); + op_ret = ret; + goto out; + } + + filler.real_path = real_path; + filler.this = this; + filler.stbuf = &preop; + filler.loc = loc; + +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else + filler.flags = flags; +#endif + op_ret = dict_foreach(dict, _handle_setxattr_keyvalue_pair, &filler); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; + } + + ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs"); + } + + if (sync_backend_xattrs) { + /* List all custom xattrs */ + subvol_xattrs = dict_new(); + if (!subvol_xattrs) + goto out; + + ret = dict_set_int32_sizen(xdata, "list-xattr", 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "Unable to set list-xattr in dict "); + goto out; + } + + subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + NULL); + + /* Remove all user xattrs from the file */ + dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr, + real_path); + + /* Remove all custom xattrs from the file */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]); + if (keyval) { + ret = sys_lremovexattr(real_path, xattrs_to_heal[i]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, + errno, "removexattr failed. key %s path %s", + xattrs_to_heal[i], loc->path); + goto out; + } + + dict_del(subvol_xattrs, xattrs_to_heal[i]); + keyval = NULL; + } + } + + /* Set custom xattrs based on info provided by DHT */ + custom_xattrs = dict->members_list; + + while (custom_xattrs != NULL) { + ret = sys_lsetxattr(real_path, custom_xattrs->key, + custom_xattrs->value->data, + custom_xattrs->value->len, flags); + if (ret) { + op_errno = errno; + gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d", + custom_xattrs->key, ret); + goto out; + } + + custom_xattrs = custom_xattrs->next; + } + } + + xattr = dict_new(); + if (!xattr) + goto out; + + /* + * FIXFIX: Send the stbuf info in the xdata for now + * This is used by DHT to redirect FOPs if the file is being migrated + * Ignore errors for now + */ + ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postop, + _gf_false); + if (ret) + goto out; + + ret = posix_set_iatt_in_dict(xattr, &preop, &postop); + + /* + * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which + * won't aware of access-control xlator. To update its context correctly, + * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path. + */ + if (dict_get(dict, GF_POSIX_ACL_ACCESS)) { + /* + * The size of buffer will be know after calling sys_lgetxattr, + * so first we allocate buffer with large size(~4k), then we + * reduced into required size using GF_REALLO(). + */ + acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char); + if (!acl_xattr) + goto out; + + acl_size = sys_lgetxattr(real_path, POSIX_ACL_ACCESS_XATTR, acl_xattr, + ACL_BUFFER_MAX); + + if (acl_size < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "Posix acl is not set " + "properly at the backend"); + goto out; + } + + /* If acl_size is more than max buffer size, just ignore it */ + if (acl_size >= ACL_BUFFER_MAX) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW, + "size of acl is more" + "than the buffer"); + goto out; + } + + acl_xattr = GF_REALLOC(acl_xattr, acl_size); + if (!acl_xattr) + goto out; + + ret = dict_set_bin(xattr, POSIX_ACL_ACCESS_XATTR, acl_xattr, acl_size); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set" + "xdata for acl"); + GF_FREE(acl_xattr); + goto out; + } + } + + if (dict_get(dict, GF_POSIX_ACL_DEFAULT)) { + acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char); + if (!acl_xattr) + goto out; + + acl_size = sys_lgetxattr(real_path, POSIX_ACL_DEFAULT_XATTR, acl_xattr, + ACL_BUFFER_MAX); + + if (acl_size < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "Posix acl is not set " + "properly at the backend"); + goto out; + } + + if (acl_size >= ACL_BUFFER_MAX) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW, + "size of acl is more" + "than the buffer"); + goto out; + } + + acl_xattr = GF_REALLOC(acl_xattr, acl_size); + if (!acl_xattr) + goto out; + + ret = dict_set_bin(xattr, POSIX_ACL_DEFAULT_XATTR, acl_xattr, acl_size); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set" + "xdata for acl"); + GF_FREE(acl_xattr); + goto out; + } + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xattr); + + if (xattr) + dict_unref(xattr); + + if (subvol_xattrs) + dict_unref(subvol_xattrs); + + return 0; +} + +int +posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) +{ + int ret = -1; + int op_ret = -1; + const char *fname = NULL; + char *real_path = NULL; + char *found = NULL; + DIR *fd = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + return -ESTALE; + } + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "posix_xattr_get_real_filename (lstat) on " + "gfid-handle %s (path: %s) failed", + real_path, loc->path); + return -errno; + } + + fd = sys_opendir(real_path); + if (!fd) + return -errno; + + fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY); + + for (;;) { + errno = 0; + entry = sys_readdir(fd, scratch); + if (!entry || errno != 0) + break; + + if (strcasecmp(entry->d_name, fname) == 0) { + found = gf_strdup(entry->d_name); + if (!found) { + (void)sys_closedir(fd); + return -ENOMEM; + } + break; + } + } + + (void)sys_closedir(fd); + + if (!found) + return -ENOATTR; + + ret = dict_set_dynstr(dict, (char *)key, found); + if (ret) { + GF_FREE(found); + return -ENOMEM; + } + ret = strlen(found) + 1; + + return ret; +} + +int +posix_get_ancestry_directory(xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata) +{ + ssize_t handle_size = 0; + struct posix_private *priv = NULL; + inode_t *inode = NULL; + int ret = -1; + char dirpath[PATH_MAX] = { + 0, + }; + + priv = this->private; + + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + + ret = posix_make_ancestryfromgfid( + this, dirpath, PATH_MAX + 1, head, type | POSIX_ANCESTRY_PATH, + leaf_inode->gfid, handle_size, priv->base_path, leaf_inode->table, + &inode, xdata, op_errno); + if (ret < 0) + goto out; + + /* there is already a reference in loc->inode */ + inode_unref(inode); + + if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) { + if (strcmp(dirpath, "/")) + dirpath[strlen(dirpath) - 1] = '\0'; + + *path = gf_strdup(dirpath); + } + +out: + return ret; +} + +int32_t +posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode, + inode_t *parent, struct stat *stbuf, + gf_dirent_t *head, char **path, int type, + dict_t *xdata, int32_t *op_errno) +{ + int op_ret = -1; + gf_dirent_t *gf_entry = NULL; + xlator_t *this = NULL; + struct posix_private *priv = NULL; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char temppath[PATH_MAX] = { + 0, + }; + char scr[PATH_MAX * 4] = { + 0, + }; + + this = THIS; + + priv = this->private; + + dirp = sys_opendir(dirpath); + if (!dirp) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED, + "could not opendir %s", dirpath); + goto out; + } + + while (count > 0) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) + break; + + if (entry->d_ino != stbuf->st_ino) + continue; + + /* Linking an inode here, can cause a race in posix_acl. + Parent inode gets linked here, but before + it reaches posix_acl_readdirp_cbk, create/lookup can + come on a leaf-inode, as parent-inode-ctx not yet updated + in posix_acl_readdirp_cbk, create and lookup can fail + with EACCESS. So do the inode linking in the quota xlator + + linked_inode = inode_link (leaf_inode, parent, + entry->d_name, NULL); + + GF_ASSERT (linked_inode == leaf_inode); + inode_unref (linked_inode);*/ + + if (type & POSIX_ANCESTRY_DENTRY) { + loc_t loc = { + 0, + }; + + loc.inode = inode_ref(leaf_inode); + gf_uuid_copy(loc.gfid, leaf_inode->gfid); + + (void)snprintf(temppath, sizeof(temppath), "%s/%s", dirpath, + entry->d_name); + + gf_entry = gf_dirent_for_name(entry->d_name); + if (!gf_entry) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL"); + op_ret = -1; + *op_errno = ENOMEM; + inode_unref(loc.inode); + goto out; + } + gf_entry->inode = inode_ref(leaf_inode); + gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1, + xdata, NULL); + iatt_from_stat(&(gf_entry->d_stat), stbuf); + + list_add_tail(&gf_entry->list, &head->list); + loc_wipe(&loc); + } + + if (type & POSIX_ANCESTRY_PATH) { + (void)snprintf(temppath, sizeof(temppath), "%s/%s", + &dirpath[priv->base_path_length], entry->d_name); + if (!*path) { + *path = gf_strdup(temppath); + } else { + /* creating a colon separated */ + /* list of hard links */ + (void)snprintf(scr, sizeof(scr), "%s:%s", *path, temppath); + + GF_FREE(*path); + *path = gf_strdup(scr); + } + if (!*path) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + } + + count--; + } + + op_ret = 0; +out: + if (dirp) { + op_ret = sys_closedir(dirp); + if (op_ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_CLOSE_FAILED, + "closedir failed"); + } + } + + return op_ret; +} + +int +posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata) +{ + size_t remaining_size = 0; + int op_ret = -1, pathlen = -1; + ssize_t handle_size = 0; + uuid_t pgfid = { + 0, + }; + int nlink_samepgfid = 0; + struct stat stbuf = { + 0, + }; + char *list = NULL; + int32_t list_offset = 0; + struct posix_private *priv = NULL; + ssize_t size = 0; + inode_t *parent = NULL; + loc_t *loc = NULL; + char *leaf_path = NULL; + char key[4096] = { + 0, + }; + char dirpath[PATH_MAX] = { + 0, + }; + char pgfidstr[UUID_CANONICAL_FORM_LEN + 1] = { + 0, + }; + int len; + + priv = this->private; + + loc = GF_CALLOC(1, sizeof(*loc), gf_posix_mt_char); + if (loc == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + gf_uuid_copy(loc->gfid, leaf_inode->gfid); + + MAKE_INODE_HANDLE(leaf_path, this, loc, NULL); + if (!leaf_path) { + GF_FREE(loc); + *op_errno = ESTALE; + goto out; + } + GF_FREE(loc); + + size = sys_llistxattr(leaf_path, NULL, 0); + if (size == -1) { + *op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting brick" + " with 'user_xattr' flag)"); + + } else { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "listxattr failed on" + "%s", + leaf_path); + } + + goto out; + } + + if (size == 0) { + op_ret = 0; + goto out; + } + + list = alloca(size); + if (!list) { + *op_errno = errno; + goto out; + } + + size = sys_llistxattr(leaf_path, list, size); + if (size < 0) { + op_ret = -1; + *op_errno = errno; + goto out; + } + remaining_size = size; + list_offset = 0; + + op_ret = sys_lstat(leaf_path, &stbuf); + if (op_ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", leaf_path); + goto out; + } + + while (remaining_size > 0) { + len = snprintf(key, sizeof(key), "%s", list + list_offset); + if (strncmp(key, PGFID_XATTR_KEY_PREFIX, + SLEN(PGFID_XATTR_KEY_PREFIX)) != 0) + goto next; + + op_ret = sys_lgetxattr(leaf_path, key, &nlink_samepgfid, + sizeof(nlink_samepgfid)); + if (op_ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on " + "%s: key = %s ", + leaf_path, key); + goto out; + } + + nlink_samepgfid = ntoh32(nlink_samepgfid); + + snprintf(pgfidstr, sizeof(pgfidstr), "%s", + key + SLEN(PGFID_XATTR_KEY_PREFIX)); + gf_uuid_parse(pgfidstr, pgfid); + + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + + /* constructing the absolute real path of parent dir */ + snprintf(dirpath, sizeof(dirpath), "%s", priv->base_path); + pathlen = PATH_MAX + 1 - priv->base_path_length; + + op_ret = posix_make_ancestryfromgfid( + this, dirpath + priv->base_path_length, pathlen, head, + type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path, + leaf_inode->table, &parent, xdata, op_errno); + if (op_ret < 0) { + goto next; + } + + dirpath[strlen(dirpath) - 1] = '\0'; + + posix_links_in_same_directory(dirpath, nlink_samepgfid, leaf_inode, + parent, &stbuf, head, path, type, xdata, + op_errno); + + if (parent != NULL) { + inode_unref(parent); + parent = NULL; + } + + next: + remaining_size -= (len + 1); + list_offset += (len + 1); + } /* while (remaining_size > 0) */ + + op_ret = 0; + +out: + return op_ret; +} + +int +posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, + char **path, int type, int32_t *op_errno, dict_t *xdata) +{ + int ret = -1; + struct posix_private *priv = NULL; + + priv = this->private; + + if (IA_ISDIR(leaf_inode->ia_type)) { + ret = posix_get_ancestry_directory(this, leaf_inode, head, path, type, + op_errno, xdata); + } else { + if (!priv->update_pgfid_nlinks) + goto out; + ret = posix_get_ancestry_non_directory(this, leaf_inode, head, path, + type, op_errno, xdata); + } + +out: + if (ret && path && *path) { + GF_FREE(*path); + *path = NULL; + } + + return ret; +} + +/** + * posix_getxattr - this function returns a dictionary with all the + * key:value pair present as xattr. used for + * both 'listxattr' and 'getxattr'. + */ +int32_t +posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + struct posix_private *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *value = NULL; + char *real_path = NULL; + dict_t *dict = NULL; + int ret = -1; + char *path = NULL; + char *rpath = NULL; + ssize_t size = 0; + char *list = NULL; + int32_t list_offset = 0; + size_t remaining_size = 0; + char *host_buf = NULL; + char *keybuffer = NULL; + int keybuff_len; + char *value_buf = NULL; + gf_boolean_t have_val = _gf_false; + struct iatt buf = { + 0, + }; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(this->private, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + + op_ret = -1; + priv = this->private; + + ret = posix_handle_georep_xattrs(frame, name, &op_errno, _gf_true); + if (ret == -1) { + op_ret = -1; + /* errno should be set from the above function*/ + goto out; + } + + ret = posix_handle_mdata_xattr(frame, name, &op_errno); + if (ret == -1) { + op_ret = -1; + /* errno should be set from the above function*/ + goto out; + } + + if (name && posix_is_gfid2path_xattr(name)) { + op_ret = -1; + op_errno = ENOATTR; + goto out; + } + + dict = dict_new(); + if (!dict) { + op_errno = ENOMEM; + goto out; + } + + if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) { + ret = posix_pacl_get(real_path, -1, name, &value); + if (ret || !value) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, + "could not get acl (%s) for" + "gfid-handle %s (path: %s)", + name, real_path, loc->path); + op_ret = -1; + goto out; + } + + ret = dict_set_dynstr(dict, (char *)name, value); + if (ret < 0) { + GF_FREE(value); + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, + "could not set acl (%s) for %s " + "(gfid-handle: %s) in dictionary", + name, loc->path, real_path); + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && + (strncmp(name, GF_XATTR_GET_REAL_FILENAME_KEY, + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename(frame, this, loc, name, dict, + xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + if (op_errno == ENOATTR) { + gf_msg_debug(this->name, 0, + "Failed to get " + "real filename (%s, %s)", + loc->path, name); + } else { + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_GETTING_FILENAME_FAILED, + "Failed to get real filename (%s, %s):", loc->path, + name); + } + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) { + if (!fd_list_empty(loc->inode)) { + ret = dict_set_uint32(dict, (char *)name, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value for %s", + name); + op_errno = ENOMEM; + goto out; + } + } else { + ret = dict_set_uint32(dict, (char *)name, 0); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value for %s", + name); + op_errno = ENOMEM; + goto out; + } + } + goto done; + } + if (loc->inode && name && (XATTR_IS_PATHINFO(name))) { + VALIDATE_OR_GOTO(this->private, out); + if (LOC_HAS_ABSPATH(loc)) { + MAKE_REAL_PATH(rpath, this, loc->path); + } else { + rpath = real_path; + } + size = gf_asprintf( + &host_buf, "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo && !gf_uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa(priv->glusterd_uuid) + : priv->hostname), + rpath); + if (size < 0) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr(dict, (char *)name, host_buf); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "could not set value" + " (%s) in dictionary", + host_buf); + GF_FREE(host_buf); + op_errno = ENOMEM; + goto out; + } + + goto done; + } + + if (loc->inode && name && (strcmp(name, GF_XATTR_NODE_UUID_KEY) == 0) && + !gf_uuid_is_null(priv->glusterd_uuid)) { + size = gf_asprintf(&host_buf, "%s", uuid_utoa(priv->glusterd_uuid)); + if (size == -1) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr(dict, GF_XATTR_NODE_UUID_KEY, host_buf); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "could not set value" + "(%s) in dictionary", + host_buf); + GF_FREE(host_buf); + op_errno = -ret; + goto out; + } + goto done; + } + + if (loc->inode && name && (strcmp(name, GFID_TO_PATH_KEY) == 0)) { + ret = inode_path(loc->inode, NULL, &path); + if (ret < 0) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_INODE_PATH_GET_FAILED, + "%s: could not get " + "inode path", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + size = ret; + ret = dict_set_dynstr(dict, GFID_TO_PATH_KEY, path); + if (ret < 0) { + op_errno = ENOMEM; + GF_FREE(path); + goto out; + } + goto done; + } + + if (loc->inode && name && (strcmp(name, GFID2PATH_VIRT_XATTR_KEY) == 0)) { + if (!priv->gfid2path) { + op_errno = ENOATTR; + op_ret = -1; + goto out; + } + ret = posix_get_gfid2path(this, loc->inode, real_path, &op_errno, dict); + if (ret < 0) { + op_ret = -1; + goto out; + } + size = ret; + goto done; + } + + if (loc->inode && name && (strcmp(name, GET_ANCESTRY_PATH_KEY) == 0)) { + int type = POSIX_ANCESTRY_PATH; + + op_ret = posix_get_ancestry(this, loc->inode, NULL, &path, type, + &op_errno, xdata); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENODATA; + goto out; + } + size = op_ret; + op_ret = dict_set_dynstr(dict, GET_ANCESTRY_PATH_KEY, path); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -op_ret, + P_MSG_GET_KEY_VALUE_FAILED, + "could not get " + "value for key (%s)", + GET_ANCESTRY_PATH_KEY); + GF_FREE(path); + op_errno = ENOMEM; + goto out; + } + + goto done; + } + + if (loc->inode && name && + (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, + SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { + op_ret = posix_get_objectsignature(real_path, dict); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + goto done; + } + + /* here allocate value_buf of 8192 bytes to avoid one extra getxattr + call,If buffer size is small to hold the xattr result then it will + allocate a new buffer value of required size and call getxattr again + */ + + value_buf = alloca(XATTR_VAL_BUF_SIZE); + if (name) { + char *key = (char *)name; + + keybuffer = key; +#if defined(GF_DARWIN_HOST_OS_DISABLED) + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(key, "user.", 5) == 0) { + key += 5; + gf_msg_debug(this->name, 0, + "getxattr for file %s (gfid-handle: %s)" + " stripping user key: %s -> %s", + loc->path, real_path, keybuffer, key); + } + } +#endif + size = sys_lgetxattr(real_path, key, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "getxattr failed due to overflow of buffer" + " on gfid-handle %s (path: %s) : %s ", + real_path, loc->path, key); + size = sys_lgetxattr(real_path, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } + if ((op_errno == ENOATTR) || (op_errno == ENODATA)) { + gf_msg_debug(this->name, 0, + "No such attribute:%s for file %s (path: %s)", + key, real_path, loc->path); + } else { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + P_MSG_XATTR_FAILED, + "getxattr failed on " + "%s (path: %s): %s ", + real_path, loc->path, key); + } + goto out; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_lgetxattr(real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s (path: %s): key = %s", real_path, + loc->path, key); + GF_FREE(value); + goto out; + } + } + value[size] = '\0'; + op_ret = dict_set_dynptr(dict, key, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, key); + GF_FREE(value); + goto out; + } + + goto done; + } + + have_val = _gf_false; + size = sys_llistxattr(real_path, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of buffer" + " on %s (path: %s) ", + real_path, loc->path); + size = sys_llistxattr(real_path, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "listxattr failed on %s (path: %s)", real_path, + loc->path); + } + goto out; + } + if (size == 0) + goto done; + } + list = alloca(size); + if (!list) { + op_errno = errno; + goto out; + } + if (have_val) { + memcpy(list, value_buf, size); + } else { + size = sys_llistxattr(real_path, list, size); + if (size < 0) { + op_ret = -1; + op_errno = errno; + goto out; + } + } + remaining_size = size; + list_offset = 0; + keybuffer = alloca(XATTR_KEY_BUF_SIZE); + while (remaining_size > 0) { + keybuff_len = snprintf(keybuffer, XATTR_KEY_BUF_SIZE, "%s", + list + list_offset); + + ret = posix_handle_georep_xattrs(frame, keybuffer, NULL, _gf_false); + if (ret == -1) + goto ignore; + + ret = posix_handle_mdata_xattr(frame, keybuffer, &op_errno); + if (ret == -1) { + goto ignore; + } + + if (posix_is_gfid2path_xattr(keybuffer)) { + goto ignore; + } + + have_val = _gf_false; + size = sys_lgetxattr(real_path, keybuffer, value_buf, + XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED, + "getxattr failed due to overflow of" + " buffer on %s (path: %s): %s ", + real_path, loc->path, keybuffer); + size = sys_lgetxattr(real_path, keybuffer, NULL, 0); + } + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on" + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); + goto out; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_errno = errno; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_lgetxattr(real_path, keybuffer, value, size); + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on" + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); + GF_FREE(value); + goto out; + } + } + value[size] = '\0'; +#ifdef GF_DARWIN_HOST_OS + /* The protocol expect namespace for now */ + char *newkey = NULL; + gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey); + keybuff_len = snprintf(keybuffer, sizeof(keybuffer), "%s", newkey); + GF_FREE(newkey); +#endif + op_ret = dict_set_dynptr(dict, keybuffer, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, keybuffer); + GF_FREE(value); + goto out; + } + + ignore: + remaining_size -= keybuff_len + 1; + list_offset += keybuff_len + 1; + + } /* while (remaining_size > 0) */ + +done: + op_ret = size; + + if (xdata && (op_ret >= 0)) { + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &buf); + } + + if (dict) { + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (dict) { + dict_unref(dict); + } + + return 0; +} + +int32_t +posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct posix_fd *pfd = NULL; + int _fd = -1; + int32_t list_offset = 0; + ssize_t size = 0; + size_t remaining_size = 0; + char *value = NULL; + char *list = NULL; + dict_t *dict = NULL; + int ret = -1; + char key[4096] = { + 0, + }; + int key_len; + char *value_buf = NULL; + gf_boolean_t have_val = _gf_false; + struct iatt buf = { + 0, + }; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + /* Get the total size */ + dict = dict_new(); + if (!dict) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + if (name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32(dict, (char *)name, 1); + if (ret < 0) { + op_ret = -1; + size = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value for %s", + name); + goto out; + } + goto done; + } + + if (name && strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, + SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) { + op_ret = posix_fdget_objectsignature(_fd, dict); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "posix_fdget_objectsignature failed"); + op_errno = -op_ret; + op_ret = -1; + size = -1; + goto out; + } + + goto done; + } + + /* here allocate value_buf of 8192 bytes to avoid one extra getxattr + call,If buffer size is small to hold the xattr result then it will + allocate a new buffer value of required size and call getxattr again + */ + value_buf = alloca(XATTR_VAL_BUF_SIZE); + + if (name) { + key_len = snprintf(key, sizeof(key), "%s", name); +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + char *newkey = NULL; + gf_add_prefix(XATTR_USER_PREFIX, key, &newkey); + key_len = snprintf(key, sizeof(key), "%s", newkey); + GF_FREE(newkey); + } +#endif + size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed due to overflow of" + "buffer on %s ", + key); + size = sys_fgetxattr(_fd, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if (errno == ENODATA || errno == ENOATTR) { + gf_msg_debug(this->name, 0, + "fgetxattr" + " failed on key %s (%s)", + key, strerror(op_errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr" + " failed on key %s", + key); + } + goto done; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_fgetxattr(_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr" + " failed on fd %p for the key %s ", + fd, key); + GF_FREE(value); + goto out; + } + } + + value[size] = '\0'; + op_ret = dict_set_dynptr(dict, key, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on key %s failed", + key); + GF_FREE(value); + goto out; + } + + goto done; + } + size = sys_flistxattr(_fd, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of buffer" + " on %p ", + fd); + size = sys_flistxattr(_fd, NULL, 0); + } + if (size == -1) { + op_ret = -1; + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting " + "brick with 'user_xattr' flag)"); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "listxattr failed " + "on %p:", + fd); + } + goto out; + } + if (size == 0) + goto done; + } + list = alloca(size + 1); + if (!list) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) + memcpy(list, value_buf, size); + else + size = sys_flistxattr(_fd, list, size); + + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + if (*(list + list_offset) == '\0') + break; + + key_len = snprintf(key, sizeof(key), "%s", list + list_offset); + have_val = _gf_false; + size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed due to overflow of buffer" + " on fd %p: for the key %s ", + fd, key); + size = sys_fgetxattr(_fd, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed " + "on fd %p for the key %s ", + fd, key); + break; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = errno; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_fgetxattr(_fd, key, value, size); + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed o" + "n the fd %p for the key %s ", + fd, key); + GF_FREE(value); + break; + } + } + value[size] = '\0'; + + op_ret = dict_set_dynptr(dict, key, value, size); + if (op_ret) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED, + "dict set operation " + "failed on key %s", + key); + GF_FREE(value); + goto out; + } + remaining_size -= key_len + 1; + list_offset += key_len + 1; + + } /* while (remaining_size > 0) */ + +done: + op_ret = size; + + if (xdata && (op_ret >= 0)) { + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, + &buf); + } + + if (dict) { + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (dict) + dict_unref(dict); + + return 0; +} + +static int +_handle_fsetxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_fhandle_pair(filler->frame, filler->this, filler->fdnum, k, v, + filler->flags, filler->stbuf, filler->fd); +} + +int32_t +posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd *pfd = NULL; + int _fd = -1; + int ret = -1; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + dict_t *xattr = NULL; + posix_xattr_filler_t filler = { + 0, + }; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + VALIDATE_OR_GOTO(dict, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + + ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED, + "fsetxattr (fstat)" + "failed on fd=%p", + fd); + goto out; + } + + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + + filler.fdnum = _fd; + filler.this = this; + filler.frame = frame; + filler.stbuf = &preop; + filler.fd = fd; +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else + filler.flags = flags; +#endif + op_ret = dict_foreach(dict, _handle_fsetxattr_keyvalue_pair, &filler); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + } + + if (!ret && xdata && dict_get(xdata, GLUSTERFS_DURABLE_OP)) { + op_ret = sys_fsync(_fd); + if (op_ret < 0) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, + P_MSG_DURABILITY_REQ_NOT_SATISFIED, + "could not satisfy durability request: " + "reason "); + } + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, &postop); + if (ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "fsetxattr (fstat)" + "failed on fd=%p", + fd); + goto out; + } + xattr = dict_new(); + if (!xattr) + goto out; + + ret = posix_set_iatt_in_dict(xattr, &preop, &postop); + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xattr); + + if (xattr) + dict_unref(xattr); + + return 0; +} + +int +_posix_remove_xattr(dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *)data; + this = filler->this; +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = (struct posix_private *)this->private; + char *newkey = NULL; + if (priv->xattr_user_namespace == XATTR_STRIP) { + gf_remove_prefix(XATTR_USER_PREFIX, key, &newkey); + gf_msg_debug("remove_xattr", 0, "key %s => %s", key, newkey); + key = newkey; + } +#endif + /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may + * have special behavior. Ex: removexattr("posix.system_acl_access"), + * removes more than one xattr on the file that could be present in the + * bulk-removal request. Removexattr of these deleted xattrs will fail + * with either ENODATA/ENOATTR. Since all this fop cares is removal of the + * xattrs in bulk-remove request and if they are already deleted, it can be + * treated as success. + */ + + if (filler->real_path) + op_ret = sys_lremovexattr(filler->real_path, key); + else + op_ret = sys_fremovexattr(filler->fdnum, key); + + if (op_ret == -1) { + if (errno == ENODATA || errno == ENOATTR) + op_ret = 0; + } + + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != ENODATA && errno != EPERM) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "removexattr failed on " + "file/dir %s with gfid: %s (for %s)", + filler->real_path ? filler->real_path : "", + uuid_utoa(filler->inode->gfid), key); + } + } +#ifdef GF_DARWIN_HOST_OS + GF_FREE(newkey); +#endif + return op_ret; +} + +int +posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata, int *op_errno, + dict_t **xdata_rsp) +{ + gf_boolean_t bulk_removexattr = _gf_false; + gf_boolean_t disallow = _gf_false; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + int op_ret = 0; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + int ret = 0; + int _fd = -1; + xlator_t *this = frame->this; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0}; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + if (loc) { + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + *op_errno = ESTALE; + goto out; + } + inode = loc->inode; + } else { + op_ret = posix_fd_ctx_get(fd, this, &pfd, op_errno); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + inode = fd->inode; + } + + if (posix_is_gfid2path_xattr(name)) { + op_ret = -1; + *op_errno = ENOATTR; + goto out; + } + + if (loc) { + ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } + } else { + ret = posix_fdstat(this, inode, _fd, &preop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path ? real_path : ""); + } + } + + if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, + "Remove xattr called on %s for file/dir %s with gfid: " + "%s", + name, real_path ? real_path : "", uuid_utoa(inode->gfid)); + op_ret = -1; + *op_errno = EPERM; + goto out; + } else if (posix_is_bulk_removexattr((char *)name, xdata)) { + bulk_removexattr = _gf_true; + (void)dict_has_key_from_array(xdata, disallow_removexattrs, &disallow); + if (disallow) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, + "Bulk removexattr has keys that shouldn't be " + "removed for file/dir %s with gfid: %s", + real_path ? real_path : "", uuid_utoa(inode->gfid)); + op_ret = -1; + *op_errno = EPERM; + goto out; + } + } + + if (bulk_removexattr) { + filler.real_path = real_path; + filler.this = this; + filler.fdnum = _fd; + filler.inode = inode; + op_ret = dict_foreach(xdata, _posix_remove_xattr, &filler); + if (op_ret) { + *op_errno = filler.op_errno; + goto out; + } + } else { + if (loc) + op_ret = sys_lremovexattr(real_path, name); + else + op_ret = sys_fremovexattr(_fd, name); + if (op_ret == -1) { + *op_errno = errno; + if (*op_errno != ENOATTR && *op_errno != ENODATA && + *op_errno != EPERM) { + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "removexattr on %s with gfid %s " + "(for %s)", + real_path, uuid_utoa(inode->gfid), name); + } + goto out; + } + } + + if (loc) { + posix_set_ctime(frame, this, real_path, -1, inode, NULL); + ret = posix_pstat(this, inode, loc->gfid, real_path, &postop, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } + } else { + posix_set_ctime(frame, this, NULL, _fd, inode, NULL); + ret = posix_fdstat(this, inode, _fd, &postop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path); + } + } + if (ret) + goto out; + *xdata_rsp = dict_new(); + if (!*xdata_rsp) + goto out; + + ret = posix_set_iatt_in_dict(*xdata_rsp, &preop, &postop); + + op_ret = 0; +out: + SET_TO_OLD_FS_ID(); + return op_ret; +} + +int32_t +posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int op_ret = -1; + int op_errno = EINVAL; + dict_t *xdata_rsp = NULL; + + VALIDATE_OR_GOTO(loc, out); + + op_ret = posix_common_removexattr(frame, loc, NULL, name, xdata, &op_errno, + &xdata_rsp); +out: + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + + return 0; +} + +int32_t +posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + dict_t *xdata_rsp = NULL; + + VALIDATE_OR_GOTO(fd, out); + + op_ret = posix_common_removexattr(frame, NULL, fd, name, xdata, &op_errno, + &xdata_rsp); +out: + STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + + return 0; +} + +int32_t +posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + struct posix_fd *pfd = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, NULL); + + return 0; +} + +void +posix_print_xattr(dict_t *this, char *key, data_t *value, void *data) +{ + gf_msg_debug("posix", 0, "(key/val) = (%s/%d)", key, data_to_int32(value)); +} + +/** + * add_array - add two arrays of 32-bit numbers (stored in network byte order) + * dest = dest + src + * @count: number of 32-bit numbers + * FIXME: handle overflow + */ + +static void +__add_array(int32_t *dest, int32_t *src, int count) +{ + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32(dest[i]); + dest[i] = hton32(destval + ntoh32(src[i])); + } +} + +static void +__add_long_array(int64_t *dest, int64_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton64(ntoh64(dest[i]) + ntoh64(src[i])); + } +} + +/* functions: + __add_array_with_default + __add_long_array_with_default + + xattrop type: + GF_XATTROP_ADD_ARRAY_WITH_DEFAULT + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT + + These operations are similar to 'GF_XATTROP_ADD_ARRAY', + except that it adds a default value if xattr is missing + or its value is zero on disk. + + One use-case of this operation is in inode-quota. + When a new directory is created, its default dir_count + should be set to 1. So when a xattrop performed setting + inode-xattrs, it should account initial dir_count + 1 if the xattrs are not present + + Here is the usage of this operation + + value required in xdata for each key + struct array { + int32_t newvalue_1; + int32_t newvalue_2; + ... + int32_t newvalue_n; + int32_t default_1; + int32_t default_2; + ... + int32_t default_n; + }; + + or + + struct array { + int32_t value_1; + int32_t value_2; + ... + int32_t value_n; + } data[2]; + fill data[0] with new value to add + fill data[1] with default value + + xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT + for i from 1 to n + { + if (xattr (dest_i) is zero or not set in the disk) + dest_i = newvalue_i + default_i + else + dest_i = dest_i + newvalue_i + } + + value in xdata after xattrop is successful + struct array { + int32_t dest_1; + int32_t dest_2; + ... + int32_t dest_n; + }; +*/ +static void +__add_array_with_default(int32_t *dest, int32_t *src, int count) +{ + int i = 0; + int32_t destval = 0; + + for (i = 0; i < count; i++) { + destval = ntoh32(dest[i]); + if (destval == 0) + dest[i] = hton32(ntoh32(src[i]) + ntoh32(src[count + i])); + else + dest[i] = hton32(destval + ntoh32(src[i])); + } +} + +static void +__add_long_array_with_default(int64_t *dest, int64_t *src, int count) +{ + int i = 0; + int64_t destval = 0; + + for (i = 0; i < count; i++) { + destval = ntoh64(dest[i]); + if (destval == 0) + dest[i] = hton64(ntoh64(src[i]) + ntoh64(src[i + count])); + else + dest[i] = hton64(destval + ntoh64(src[i])); + } +} + +static int +_posix_handle_xattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + char *dst_data = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + posix_inode_ctx_t *ctx = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + count = v->len; + if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT || + optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT) + count = count / 2; + + array = GF_CALLOC(count, sizeof(char), gf_posix_mt_char); + +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { + k += XATTR_USER_PREFIX_LEN; + } + } +#endif + op_ret = posix_inode_ctx_get_all(inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&ctx->xattrop_lock); + { + if (filler->real_path) { + size = sys_lgetxattr(filler->real_path, k, (char *)array, count); + } else { + size = sys_fgetxattr(filler->fdnum, k, (char *)array, count); + } + + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr(marker_xattrs, k)) { + if (filler->real_path) + gf_msg(this->name, fop_log_level(GF_FOP_XATTROP, op_errno), + op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s while " + "doing xattrop: Key:%s ", + filler->real_path, k); + else + gf_msg( + this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "fgetxattr failed on gfid=%s " + "while doing xattrop: " + "Key:%s (%s)", + uuid_utoa(filler->inode->gfid), k, strerror(op_errno)); + } + + op_ret = -1; + goto unlock; + } + + if (size == -1 && optype == GF_XATTROP_GET_AND_SET) { + GF_FREE(array); + array = NULL; + } + + /* We only write back the xattr if it has been really modified + * (i.e. v->data is not all 0's). Otherwise we return its value + * but we don't update anything. + * + * If the xattr does not exist, a value of all 0's is returned + * without creating it. */ + size = count; + if (optype != GF_XATTROP_GET_AND_SET && + mem_0filled(v->data, v->len) == 0) + goto unlock; + + dst_data = array; + switch (optype) { + case GF_XATTROP_ADD_ARRAY: + __add_array((int32_t *)array, (int32_t *)v->data, count / 4); + break; + + case GF_XATTROP_ADD_ARRAY64: + __add_long_array((int64_t *)array, (int64_t *)v->data, + count / 8); + break; + + case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT: + __add_array_with_default((int32_t *)array, (int32_t *)v->data, + count / 4); + break; + + case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT: + __add_long_array_with_default((int64_t *)array, + (int64_t *)v->data, count / 8); + break; + + case GF_XATTROP_GET_AND_SET: + dst_data = v->data; + break; + + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_UNKNOWN_OP, + "Unknown xattrop type (%d)" + " on %s. Please send a bug report to " + "gluster-devel@gluster.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr(filler->real_path, k, dst_data, count, 0); + } else { + size = sys_fsetxattr(filler->fdnum, k, (char *)dst_data, count, 0); + } + op_errno = errno; + } +unlock: + pthread_mutex_unlock(&ctx->xattrop_lock); + + if (op_ret == -1) + goto out; + + if (size == -1) { + if (filler->real_path) + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "setxattr failed on %s " + "while doing xattrop: key=%s", + filler->real_path, k); + else + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "fsetxattr failed on gfid=%s while doing " + "xattrop: key=%s (%s)", + uuid_utoa(filler->inode->gfid), k, strerror(op_errno)); + op_ret = -1; + goto out; + } else if (array) { + op_ret = dict_set_bin(filler->xattr, k, array, count); + if (op_ret) { + if (filler->real_path) + gf_msg_debug(this->name, 0, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", + filler->real_path, k, strerror(-size)); + else + gf_msg_debug(this->name, 0, + "dict_set_bin failed (gfid=%s): " + "key=%s (%s)", + uuid_utoa(filler->inode->gfid), k, + strerror(-size)); + + op_ret = -1; + op_errno = EINVAL; + GF_FREE(array); + array = NULL; + goto out; + } + array = NULL; + } + +out: + if (op_ret < 0) + filler->op_errno = op_errno; + + if (array) + GF_FREE(array); + + return op_ret; +} + +/** + * xattrop - xattr operations - for internal use by GlusterFS + * @optype: ADD_ARRAY: + * dict should contain: + * "key" ==> array of 32-bit numbers + */ + +int +do_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = { + 0, + }; + dict_t *xattr_rsp = NULL; + dict_t *xdata_rsp = NULL; + struct iatt stbuf = {0}; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(xattr, out); + VALIDATE_OR_GOTO(this, out); + + if (fd) { + op_ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, + fop_log_level(GF_FOP_FXATTROP, op_errno), + P_MSG_PFD_GET_FAILED, + "failed to get pfd from" + " fd=%p", + fd); + goto out; + } + _fd = pfd->fd; + } + + if (loc && !gf_uuid_is_null(loc->gfid)) { + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + } + + if (real_path) { + inode = loc->inode; + } else if (fd) { + inode = fd->inode; + } + + xattr_rsp = dict_new(); + if (xattr_rsp == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + filler.this = this; + filler.fdnum = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; + filler.xattr = xattr_rsp; + + op_ret = dict_foreach(xattr, _posix_handle_xattr_keyvalue_pair, &filler); + op_errno = filler.op_errno; + if (op_ret < 0) + goto out; + + if (!xdata) + goto out; + + if (fd) { + op_ret = posix_fdstat(this, inode, _fd, &stbuf); + } else { + op_ret = posix_pstat(this, inode, inode->gfid, real_path, &stbuf, + _gf_false); + } + if (op_ret < 0) { + op_errno = errno; + goto out; + } + xdata_rsp = posix_xattr_fill(this, real_path, loc, fd, _fd, xdata, &stbuf); + if (!xdata_rsp) { + op_ret = -1; + op_errno = ENOMEM; + } + posix_set_mode_in_dict(xdata, xdata_rsp, &stbuf); +out: + + STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, xattr_rsp, xdata_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + return 0; +} + +int +posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop(frame, this, loc, NULL, optype, xattr, xdata); + return 0; +} + +int +posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop(frame, this, NULL, fd, optype, xattr, xdata); + return 0; +} + +int +posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = errno; + goto out; + } + + op_ret = sys_access(real_path, mask & 07); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED, + "access failed on %s", real_path); + goto out; + } + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) { + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_ftruncate(_fd, offset); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, + "ftruncate failed on fd=%p (%" PRId64 "", fd, offset); + goto out; + } + + op_ret = posix_fdstat(this, fd->inode, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, &preop, &postop, + NULL); + + return 0; +} + +int32_t +posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int _fd = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt buf = { + 0, + }; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int ret = -1; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + if (!xdata) + gf_msg_trace(this->name, 0, "null xdata passed, fd %p", fd); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat(this, fd->inode, _fd, &buf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) { + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf); + + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata, + &xattr_rsp, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + } + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); + } + + posix_update_iatt_buf(&buf, _fd, NULL, xdata); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + return 0; +} + +int32_t +posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct gf_lease *lease, dict_t *xdata) +{ + struct gf_lease nullease = { + 0, + }; + + gf_msg(this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED, + "\"features/leases\" translator is not loaded. You need" + "to use it for proper functioning of your application"); + + STACK_UNWIND_STRICT(lease, frame, -1, ENOSYS, &nullease, NULL); + return 0; +} + +static int gf_posix_lk_log; + +int32_t +posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + struct gf_flock nullock = { + 0, + }; + + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(lk, frame, -1, ENOSYS, &nullock, NULL); + return 0; +} + +int32_t +posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(inodelk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(finodelk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(entrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int +posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size, + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) +{ + off_t in_case = -1; + off_t last_off = 0; + size_t filled = 0; + int count = 0; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + struct posix_fd *pfd = NULL; + struct stat stbuf = { + 0, + }; + char *hpath = NULL; + int len = 0; + int ret = 0; + int op_errno = 0; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + count = -1; + errno = op_errno; + goto out; + } + + if (skip_dirs) { + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); + if (len <= 0) { + errno = ESTALE; + count = -1; + goto out; + } + len = strlen(hpath); + hpath[len] = '/'; + } + + if (!off) { + rewinddir(dir); + } else { + seekdir(dir, off); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != off && off != pfd->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + off, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + } + + while (filled <= size) { + in_case = (u_long)telldir(dir); + + if (in_case == -1) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED, + "telldir failed on dir=%p", dir); + goto out; + } + + errno = 0; + + entry = sys_readdir(dir, scratch); + + if (!entry || errno != 0) { + if (errno == EBADF) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, + P_MSG_DIR_OPERATION_FAILED, "readdir failed on dir=%p", + dir); + goto out; + } + break; + } + +#ifdef __NetBSD__ + /* + * NetBSD with UFS1 backend uses backing files for + * extended attributes. They can be found in a + * .attribute file located at the root of the filesystem + * We hide it to glusterfs clients, since chaos will occur + * when the cluster/dht xlator decides to distribute + * exended attribute backing file across storage servers. + */ + if (__is_root_gfid(fd->inode->gfid) == 0 && + (!strcmp(entry->d_name, ".attribute"))) + continue; +#endif /* __NetBSD__ */ + + if (__is_root_gfid(fd->inode->gfid) && + (!strcmp(GF_HIDDEN_PATH, entry->d_name))) { + continue; + } + + if (skip_dirs) { + if (DT_ISDIR(entry->d_type)) { + continue; + } else if (hpath) { + strcpy(&hpath[len + 1], entry->d_name); + ret = sys_lstat(hpath, &stbuf); + if (!ret && S_ISDIR(stbuf.st_mode)) + continue; + } + } + + this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + + strlen(entry->d_name) + 1; + + if (this_size + filled > size) { + seekdir(dir, in_case); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != in_case && in_case != pfd->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + P_MSG_DIR_OPERATION_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + in_case, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + break; + } + + this_entry = gf_dirent_for_name(entry->d_name); + + if (!this_entry) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + P_MSG_GF_DIRENT_CREATE_FAILED, + "could not create " + "gf_dirent for entry %s", + entry->d_name); + goto out; + } + /* + * we store the offset of next entry here, which is + * probably not intended, but code using syncop_readdir() + * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it + * for directory read resumption. + */ + last_off = (u_long)telldir(dir); + this_entry->d_off = last_off; + this_entry->d_ino = entry->d_ino; + this_entry->d_type = entry->d_type; + + list_add_tail(&this_entry->list, &entries->list); + + filled += this_size; + count++; + } + + if ((!sys_readdir(dir, scratch) && (errno == 0))) { + /* Indicate EOF */ + errno = ENOENT; + /* Remember EOF offset for later detection */ + pfd->dir_eof = (u_long)last_off; + } +out: + return count; +} + +dict_t * +posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd, + char *entry_path, dict_t *dict, struct iatt *stbuf) +{ + loc_t tmp_loc = { + 0, + }; + + /* if we don't send the 'loc', open-fd-count be a problem. */ + tmp_loc.inode = inode; + + return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf); +} + +int +posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, + dict_t *dict) +{ + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = { + 0, + }; + uuid_t gfid; + int ret = -1; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); + if (len <= 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, + "Failed to create handle path, fd=%p, gfid=%s", fd, + uuid_utoa(fd->inode->gfid)); + return -1; + } + len = strlen(hpath); + hpath[len] = '/'; + + list_for_each_entry(entry, &entries->list, list) + { + inode = inode_grep(fd->inode->table, fd->inode, entry->d_name); + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + bzero(gfid, 16); + + strcpy(&hpath[len + 1], entry->d_name); + + ret = posix_pstat(this, inode, gfid, hpath, &stbuf, _gf_false); + + if (ret == -1) { + if (inode) + inode_unref(inode); + continue; + } + + posix_update_iatt_buf(&stbuf, -1, hpath, dict); + + if (!inode) + inode = inode_find(itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new(itable); + + entry->inode = inode; + + if (dict) { + entry->dict = posix_entry_xattr_fill(this, entry->inode, fd, hpath, + dict, &stbuf); + } + + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + + if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) { + /* The platform supports d_type but the underlying + filesystem doesn't. We set d_type to the correct + value from ia_type */ + entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type); + } + + inode = NULL; + } + + return 0; +} + +int32_t +posix_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, int whichop, dict_t *dict) +{ + struct posix_fd *pfd = NULL; + DIR *dir = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + int32_t skip_dirs = 0; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + INIT_LIST_HEAD(&entries.list); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + dir = pfd->dir; + + if (!dir) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; + goto out; + } + + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32(dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK(&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir(fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK(&fd->lock); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + + if (whichop != GF_FOP_READDIRP) + goto out; + + posix_readdirp_fill(this, fd, &entries, dict); + +out: + if (whichop == GF_FOP_READDIR) + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL); + else + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL); + + gf_dirent_free(&entries); + + return 0; +} + +int32_t +posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIR, xdata); + return 0; +} + +int32_t +posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) +{ + gf_dirent_t entries; + int32_t op_ret = -1, op_errno = 0; + gf_dirent_t *entry = NULL; + + if ((dict != NULL) && (dict_get(dict, GET_ANCESTRY_DENTRY_KEY))) { + INIT_LIST_HEAD(&entries.list); + + op_ret = posix_get_ancestry(this, fd->inode, &entries, NULL, + POSIX_ANCESTRY_DENTRY, &op_errno, dict); + if (op_ret >= 0) { + op_ret = 0; + + list_for_each_entry(entry, &entries.list, list) { op_ret++; } + } + + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL); + + gf_dirent_free(&entries); + return 0; + } + + posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIRP, dict); + return 0; +} + +int32_t +posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len, dict_t *xdata) +{ + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + ssize_t bytes_read = 0; + int32_t weak_checksum = 0; + int32_t zerofillcheck = 0; + /* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH, + so this is used. */ + unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0}; + unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; + unsigned char *checksum = NULL; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + gf_boolean_t buf_has_zeroes = _gf_false; + struct iatt preop = { + 0, + }; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + + alloc_buf = _page_aligned_alloc(len, &buf); + if (!alloc_buf) { + op_errno = ENOMEM; + goto out; + } + + rsp_xdata = dict_new(); + if (!rsp_xdata) { + op_errno = ENOMEM; + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + LOCK(&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect(fd, pfd, 0, offset, len); + + bytes_read = sys_pread(_fd, buf, len, offset); + if (bytes_read < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PREAD_FAILED, + "pread of %d bytes returned %zd", len, bytes_read); + + op_errno = errno; + } + } + UNLOCK(&fd->lock); + + if (bytes_read < 0) + goto out; + + if (xdata && + dict_get_int32(xdata, "check-zero-filled", &zerofillcheck) == 0) { + buf_has_zeroes = (mem_0filled(buf, bytes_read)) ? _gf_false : _gf_true; + ret = dict_set_uint32(rsp_xdata, "buf-has-zeroes", buf_has_zeroes); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for key: %s", + uuid_utoa(fd->inode->gfid), "buf-has-zeroes"); + op_errno = -ret; + goto out; + } + } + weak_checksum = gf_rsync_weak_checksum((unsigned char *)buf, (size_t)ret); + + if (priv->fips_mode_rchecksum) { + ret = dict_set_int32(rsp_xdata, "fips-mode-rchecksum", 1); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for key: %s", + uuid_utoa(fd->inode->gfid), "fips-mode-rchecksum"); + goto out; + } + checksum = strong_checksum; + gf_rsync_strong_checksum((unsigned char *)buf, (size_t)bytes_read, + (unsigned char *)checksum); + } else { + checksum = md5_checksum; + gf_rsync_md5_checksum((unsigned char *)buf, (size_t)bytes_read, + (unsigned char *)checksum); + } + op_ret = 0; + + posix_set_ctime(frame, this, NULL, _fd, fd->inode, NULL); + +out: + STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum, + checksum, rsp_xdata); + if (rsp_xdata) + dict_unref(rsp_xdata); + GF_FREE(alloc_buf); + + return 0; +} + +int +posix_forget(xlator_t *this, inode_t *inode) +{ + int ret = 0; + char *unlink_path = NULL; + uint64_t ctx_uint1 = 0; + uint64_t ctx_uint2 = 0; + posix_inode_ctx_t *ctx = NULL; + posix_mdata_t *mdata = NULL; + struct posix_private *priv_posix = NULL; + + priv_posix = (struct posix_private *)this->private; + if (!priv_posix) + return 0; + + ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2); + if (!ctx_uint1) + goto check_ctx2; + + ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1; + + if (ctx->unlink_flag == GF_UNLINK_TRUE) { + POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid, + unlink_path); + if (!unlink_path) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, + "Failed to remove gfid :%s", uuid_utoa(inode->gfid)); + ret = -1; + goto ctx_free; + } + ret = sys_unlink(unlink_path); + } +ctx_free: + pthread_mutex_destroy(&ctx->xattrop_lock); + pthread_mutex_destroy(&ctx->write_atomic_lock); + pthread_mutex_destroy(&ctx->pgfid_lock); + GF_FREE(ctx); + +check_ctx2: + if (ctx_uint2) { + mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2; + } + + GF_FREE(mdata); + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-handle.h b/xlators/storage/posix/src/posix-inode-handle.h new file mode 100644 index 00000000000..36c47f2bebc --- /dev/null +++ b/xlators/storage/posix/src/posix-inode-handle.h @@ -0,0 +1,118 @@ +/* + Copyright (c) 2011-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _POSIX_INODE_HANDLE_H +#define _POSIX_INODE_HANDLE_H + +#include <limits.h> +#include <sys/types.h> +#include <glusterfs/gf-dirent.h> +#include "posix.h" + +/* From Open Group Base Specifications Issue 6 */ +#ifndef _XOPEN_PATH_MAX +#define _XOPEN_PATH_MAX 1024 +#endif + +#define TRASH_DIR "landfill" + +#define UUID0_STR "00000000-0000-0000-0000-000000000000" +#define SLEN(str) (sizeof(str) - 1) + +#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/')) +#define LOC_IS_DIR(loc) \ + (loc && (loc->inode) && (loc->inode->ia_type == IA_IFDIR)) +#define MAKE_REAL_PATH(var, this, path) \ + do { \ + size_t path_len = strlen(path); \ + size_t var_len = path_len + POSIX_BASE_PATH_LEN(this) + 1; \ + if (POSIX_PATH_MAX(this) != -1 && var_len >= POSIX_PATH_MAX(this)) { \ + var = alloca(path_len + 1); \ + strcpy(var, (path[0] == '/') ? path + 1 : path); \ + } else { \ + var = alloca(var_len); \ + strcpy(var, POSIX_BASE_PATH(this)); \ + strcpy(&var[POSIX_BASE_PATH_LEN(this)], path); \ + } \ + } while (0) + +#define MAKE_HANDLE_PATH(var, this, gfid, base) \ + do { \ + int __len = 0; \ + int tot = PATH_MAX; \ + var = alloca(tot); \ + __len = posix_handle_path(this, gfid, base, var, tot); \ + if (__len <= 0) { \ + var = NULL; \ + } \ + } while (0) + +/* TODO: it is not a good idea to change a variable which + is not passed to the macro.. Fix it later */ +#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) \ + do { \ + if (!this->private) { \ + op_ret = -1; \ + gf_msg("make_inode_handle", GF_LOG_ERROR, 0, \ + P_MSG_INODE_HANDLE_CREATE, \ + "private is NULL, fini is already called"); \ + break; \ + } \ + if (gf_uuid_is_null(loc->gfid)) { \ + op_ret = -1; \ + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_INODE_HANDLE_CREATE, \ + "null gfid for path %s", (loc)->path); \ + break; \ + } \ + if (LOC_IS_DIR(loc) && LOC_HAS_ABSPATH(loc)) { \ + MAKE_REAL_PATH(rpath, this, (loc)->path); \ + op_ret = posix_pstat(this, (loc)->inode, (loc)->gfid, rpath, \ + iatt_p, _gf_false); \ + break; \ + } \ + errno = 0; \ + op_ret = posix_istat(this, loc->inode, loc->gfid, NULL, iatt_p); \ + if (errno != ELOOP) { \ + MAKE_HANDLE_PATH(rpath, this, (loc)->gfid, NULL); \ + if (!rpath) { \ + op_ret = -1; \ + gf_msg(this->name, GF_LOG_ERROR, errno, \ + P_MSG_INODE_HANDLE_CREATE, \ + "Failed to create inode handle " \ + "for path %s", \ + (loc)->path); \ + } \ + break; \ + } /* __ret == -1 && errno == ELOOP */ \ + else { \ + op_ret = -1; \ + } \ + } while (0) + +#define POSIX_ANCESTRY_PATH (1 << 0) +#define POSIX_ANCESTRY_DENTRY (1 << 1) + +int +posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *buf, + size_t len); + +int +posix_make_ancestryfromgfid(xlator_t *this, char *path, int pathsize, + gf_dirent_t *head, int type, uuid_t gfid, + const size_t handle_size, + const char *priv_base_path, inode_table_t *table, + inode_t **parent, dict_t *xdata, int32_t *op_errno); + +int +posix_handle_init(xlator_t *this); + +int +posix_handle_trash_init(xlator_t *this); + +#endif /* !_POSIX_INODE_HANDLE_H */ diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h index 81752c17e78..2253f381ac5 100644 --- a/xlators/storage/posix/src/posix-mem-types.h +++ b/xlators/storage/posix/src/posix-mem-types.h @@ -10,18 +10,16 @@ #ifndef __POSIX_MEM_TYPES_H__ #define __POSIX_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_posix_mem_types_ { - gf_posix_mt_dir_entry_t = gf_common_mt_end + 1, - gf_posix_mt_posix_fd, - gf_posix_mt_char, - gf_posix_mt_posix_private, - gf_posix_mt_int32_t, - gf_posix_mt_posix_dev_t, - gf_posix_mt_trash_path, - gf_posix_mt_paiocb, - gf_posix_mt_end + gf_posix_mt_posix_fd = gf_common_mt_end + 1, + gf_posix_mt_char, + gf_posix_mt_posix_private, + gf_posix_mt_trash_path, + gf_posix_mt_paiocb, + gf_posix_mt_inode_ctx_t, + gf_posix_mt_mdata_attr, + gf_posix_mt_end }; #endif - diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h new file mode 100644 index 00000000000..f5bede266da --- /dev/null +++ b/xlators/storage/posix/src/posix-messages.h @@ -0,0 +1,74 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _POSIX_MESSAGES_H_ +#define _POSIX_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED, + P_MSG_READV_FAILED, P_MSG_FSTAT_FAILED, P_MSG_PFD_NULL, + P_MSG_INVALID_ARGUMENT, P_MSG_IO_SUBMIT_FAILED, P_MSG_WRITEV_FAILED, + P_MSG_IO_GETEVENTS_FAILED, P_MSG_UNKNOWN_OP, P_MSG_AIO_UNAVAILABLE, + P_MSG_IO_SETUP_FAILED, P_MSG_ZEROFILL_FAILED, P_MSG_OPENDIR_FAILED, + P_MSG_DIRFD_FAILED, P_MSG_FD_PATH_SETTING_FAILED, P_MSG_LSTAT_FAILED, + P_MSG_READYLINK_FAILED, P_MSG_GFID_FAILED, P_MSG_CREATE_FAILED, + P_MSG_MKNOD_FAILED, P_MSG_LCHOWN_FAILED, P_MSG_ACL_FAILED, + P_MSG_MKDIR_NOT_PERMITTED, P_MSG_DIR_OF_SAME_ID, P_MSG_MKDIR_FAILED, + P_MSG_CHOWN_FAILED, P_MSG_UNLINK_FAILED, P_MSG_KEY_STATUS_INFO, + P_MSG_XATTR_STATUS, P_MSG_RMDIR_NOT_PERMITTED, P_MSG_RMDIR_FAILED, + P_MSG_DIR_OPERATION_FAILED, P_MSG_SYMLINK_FAILED, P_MSG_DIR_FOUND, + P_MSG_LINK_FAILED, P_MSG_TRUNCATE_FAILED, P_MSG_FILE_OP_FAILED, + P_MSG_READ_FAILED, P_MSG_DICT_SET_FAILED, P_MSG_STATVFS_FAILED, + P_MSG_DIR_NOT_NULL, P_MSG_FSYNC_FAILED, P_MSG_CLOSE_FAILED, + P_MSG_GETTING_FILENAME_FAILED, P_MSG_INODE_PATH_GET_FAILED, + P_MSG_GET_KEY_VALUE_FAILED, P_MSG_CHMOD_FAILED, P_MSG_FCHMOD_FAILED, + P_MSG_FCHOWN_FAILED, P_MSG_UTIMES_FAILED, P_MSG_FUTIMES_FAILED, + P_MSG_XATTR_NOT_REMOVED, P_MSG_PFD_GET_FAILED, P_MSG_ACCESS_FAILED, + P_MSG_PREAD_FAILED, P_MSG_UUID_NULL, P_MSG_EXPORT_DIR_MISSING, + P_MSG_SUBVOLUME_ERROR, P_MSG_VOLUME_DANGLING, P_MSG_INVALID_OPTION, + P_MSG_INVALID_VOLUME_ID, P_MSG_VOLUME_ID_ABSENT, + P_MSG_HOSTNAME_MISSING, P_MSG_SET_ULIMIT_FAILED, + P_MSG_SET_FILE_MAX_FAILED, P_MSG_MAX_FILE_OPEN, P_MSG_OPEN_FAILED, + P_MSG_LOOKUP_NOT_PERMITTED, P_MSG_RENAME_FAILED, P_MSG_WRITE_FAILED, + P_MSG_FILE_FAILED, P_MSG_THREAD_FAILED, P_MSG_HEALTHCHECK_FAILED, + P_MSG_GET_FDCTX_FAILED, P_MSG_HANDLEPATH_FAILED, + P_MSG_IPC_NOT_HANDLE, P_MSG_SET_XDATA_FAIL, + P_MSG_DURABILITY_REQ_NOT_SATISFIED, P_MSG_XATTR_NOTSUP, + P_MSG_GFID_SET_FAILED, P_MSG_ACL_NOTSUP, P_MSG_BASEPATH_CHDIR_FAILED, + P_MSG_INVALID_OPTION_VAL, P_MSG_INVALID_NODE_UUID, + P_MSG_FSYNCER_THREAD_CREATE_FAILED, P_MSG_GF_DIRENT_CREATE_FAILED, + P_MSG_VOLUME_ID_FETCH_FAILED, P_MSG_UNKNOWN_ARGUMENT, + P_MSG_INODE_HANDLE_CREATE, P_MSG_ENTRY_HANDLE_CREATE, P_MSG_PGFID_OP, + P_MSG_POSIX_AIO, P_MSG_HANDLE_CREATE_TRASH, P_MSG_HANDLE_CREATE, + P_MSG_HANDLE_PATH_CREATE, P_MSG_SET_FILE_CONTENTS, + P_MSG_XDATA_GETXATTR, P_MSG_STALE_HANDLE_REMOVE_FAILED, + P_MSG_HANDLE_PATH_CREATE_FAILED, P_MSG_HANDLE_TRASH_CREATE, + P_MSG_HANDLE_DELETE, P_MSG_READLINK_FAILED, P_MSG_BUFFER_OVERFLOW, + P_MSG_SEEK_UNKOWN, P_MSG_SEEK_FAILED, P_MSG_INODE_RESOLVE_FAILED, + P_MSG_PREOP_CHECK_FAILED, P_MSG_LEASE_DISABLED, + P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED, + P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED, + P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, + P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, + P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED, P_MSG_NOMEM, + P_MSG_PSTAT_FAILED, P_MSG_FDSTAT_FAILED); + +#endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/storage/posix/src/posix-metadata-disk.h b/xlators/storage/posix/src/posix-metadata-disk.h new file mode 100644 index 00000000000..8833fbb5428 --- /dev/null +++ b/xlators/storage/posix/src/posix-metadata-disk.h @@ -0,0 +1,31 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _POSIX_METADATA_DISK_H +#define _POSIX_METADATA_DISK_H + +typedef struct gf_timespec_disk { + uint64_t tv_sec; + uint64_t tv_nsec; +} gf_timespec_disk_t; + +/* posix_mdata_t on disk structure */ + +typedef struct __attribute__((__packed__)) posix_mdata_disk { + /* version of structure, bumped up if any new member is added */ + uint8_t version; + /* flags indicates valid fields in the structure */ + uint64_t flags; + gf_timespec_disk_t ctime; + gf_timespec_disk_t mtime; + gf_timespec_disk_t atime; +} posix_mdata_disk_t; + +#endif /* _POSIX_METADATA_DISK_H */ diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c new file mode 100644 index 00000000000..b1889052f11 --- /dev/null +++ b/xlators/storage/posix/src/posix-metadata.c @@ -0,0 +1,916 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> +#include "posix-metadata.h" +#include "posix-metadata-disk.h" +#include "posix-handle.h" +#include "posix-messages.h" +#include <glusterfs/syscall.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> + +static int gf_posix_xattr_enotsup_log; + +/* posix_mdata_to_disk converts posix_mdata_t into network byte order to + * save it on disk in machine independent format + */ +static inline void +posix_mdata_to_disk(posix_mdata_disk_t *out, posix_mdata_t *in) +{ + out->version = in->version; + out->flags = htobe64(in->flags); + + out->ctime.tv_sec = htobe64(in->ctime.tv_sec); + out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec); + + out->mtime.tv_sec = htobe64(in->mtime.tv_sec); + out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec); + + out->atime.tv_sec = htobe64(in->atime.tv_sec); + out->atime.tv_nsec = htobe64(in->atime.tv_nsec); +} + +/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order + */ +static inline void +posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) +{ + out->version = in->version; + out->flags = be64toh(in->flags); + + out->ctime.tv_sec = be64toh(in->ctime.tv_sec); + out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec); + + out->mtime.tv_sec = be64toh(in->mtime.tv_sec); + out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec); + + out->atime.tv_sec = be64toh(in->atime.tv_sec); + out->atime.tv_nsec = be64toh(in->atime.tv_nsec); +} + +void +posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in) +{ + out->ia_ctime = be64toh(in->ctime.tv_sec); + out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec); + + out->ia_mtime = be64toh(in->mtime.tv_sec); + out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec); + + out->ia_atime = be64toh(in->atime.tv_sec); + out->ia_atime_nsec = be64toh(in->atime.tv_nsec); +} + +/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ +static int +posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + inode_t *inode, posix_mdata_t *metadata, int *op_errno) +{ + size_t size = 256; + int op_ret = -1; + char *value = NULL; + gf_boolean_t fd_based_fop = _gf_false; + char gfid_str[64] = {0}; + char *real_path = NULL; + + if (!metadata) { + goto out; + } + + if (_fd != -1) { + fd_based_fop = _gf_true; + } + if (!(fd_based_fop || real_path_arg)) { + GF_VALIDATE_OR_GOTO(this->name, inode, out); + MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); + if (!real_path) { + *op_errno = errno; + uuid_utoa_r(inode->gfid, gfid_str); + gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_LSTAT_FAILED, + "lstat on gfid %s failed", gfid_str); + goto out; + } + } + + value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char); + if (!value) { + *op_errno = ENOMEM; + goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, size); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size); + } + + if (size == -1) { + *op_errno = errno; + if (value) { + GF_FREE(value); + value = NULL; + } + if ((*op_errno == ENOTSUP) || (*op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not supported" + " (try remounting brick with 'user xattr' " + "flag)"); + } else if (*op_errno == ENOATTR || *op_errno == ENODATA) { + gf_msg_debug(this->name, 0, + "No such attribute:%s for file %s gfid: %s", + GF_XATTR_MDATA_KEY, + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + inode ? uuid_utoa(inode->gfid) : "null"); + goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, NULL, 0); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, NULL, 0); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, NULL, 0); + } + + if (size == -1) { /* give up now and exist with an error */ + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); + goto out; + } + + value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char); + if (!value) { + *op_errno = ENOMEM; + goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, + size); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size); + } + if (size == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); + goto out; + } + } + posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); + + op_ret = 0; +out: + if (value) + GF_FREE(value); + return op_ret; +} + +/* posix_store_mdata_xattr stores the posix_mdata_t on disk */ +static int +posix_store_mdata_xattr(xlator_t *this, const char *real_path_arg, int fd, + inode_t *inode, posix_mdata_t *metadata) +{ + char *real_path = NULL; + int op_ret = 0; + gf_boolean_t fd_based_fop = _gf_false; + char *key = GF_XATTR_MDATA_KEY; + char gfid_str[64] = {0}; + posix_mdata_disk_t disk_metadata; + + if (!metadata) { + op_ret = -1; + goto out; + } + + if (fd != -1) { + fd_based_fop = _gf_true; + } + if (!(fd_based_fop || real_path_arg)) { + MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); + if (!real_path) { + uuid_utoa_r(inode->gfid, gfid_str); + gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_LSTAT_FAILED, + "lstat on gfid %s failed", gfid_str); + op_ret = -1; + goto out; + } + } + + /* Set default version as 1 */ + posix_mdata_to_disk(&disk_metadata, metadata); + + if (fd_based_fop) { + op_ret = sys_fsetxattr(fd, key, (void *)&disk_metadata, + sizeof(posix_mdata_disk_t), 0); + } else if (real_path_arg) { + op_ret = sys_lsetxattr(real_path_arg, key, (void *)&disk_metadata, + sizeof(posix_mdata_disk_t), 0); + } else if (real_path) { + op_ret = sys_lsetxattr(real_path, key, (void *)&disk_metadata, + sizeof(posix_mdata_disk_t), 0); + } + +#ifdef GF_DARWIN_HOST_OS + if (real_path_arg) { + posix_dump_buffer(this, real_path_arg, key, value, 0); + } else if (real_path) { + posix_dump_buffer(this, real_path, key, value, 0); + } +#endif +out: + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "file: %s: gfid: %s key:%s ", + real_path ? real_path : (real_path_arg ? real_path_arg : "null"), + uuid_utoa(inode->gfid), key); + } + return op_ret; +} + +/* _posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails + * to get it from inode context, gets it from disk. This is with out inode lock. + */ +int +__posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf) +{ + uint64_t ctx; + posix_mdata_t *mdata = NULL; + int ret = -1; + int op_errno = 0; + + /* Handle readdirp: inode might be null, time attributes should be served + * from xattr not from backend's file attributes */ + if (inode) { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + } + } else { + ret = -1; + } + + if (ret == -1 || !mdata) { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. file: %s: gfid: %s", + real_path ? real_path : "null", + inode ? uuid_utoa(inode->gfid) : "null"); + ret = -1; + goto out; + } + + ret = posix_fetch_mdata_xattr(this, real_path, _fd, inode, mdata, + &op_errno); + + if (ret == 0) { + /* Got mdata from disk, set it in inode ctx. This case + * is hit when in-memory status is lost due to brick + * down scenario + */ + if (inode) { + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } + } else { + /* Failed to get mdata from disk, xattr missing. + * This happens when the file is created before + * ctime is enabled. + */ + if (stbuf && op_errno != ENOENT) { + ret = 0; + GF_FREE(mdata); + goto out; + } else { + /* This case should not be hit. If it hits, + * don't fail, log warning, free mdata and move + * on + */ + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ", + real_path ? real_path : "null", + inode ? uuid_utoa(inode->gfid) : "null", + GF_XATTR_MDATA_KEY); + GF_FREE(mdata); + ret = 0; + goto out; + } + } + } + + ret = 0; + + if (ret == 0 && stbuf) { + stbuf->ia_ctime = mdata->ctime.tv_sec; + stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec; + stbuf->ia_mtime = mdata->mtime.tv_sec; + stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec; + stbuf->ia_atime = mdata->atime.tv_sec; + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } + /* Not set in inode context, hence free mdata */ + if (!inode) { + GF_FREE(mdata); + } + +out: + return ret; +} + +/* posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails + * to get it from inode context, gets it from disk. This is with inode lock. + */ +int +posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + LOCK(&inode->lock); + { + ret = __posix_get_mdata_xattr(this, real_path, _fd, inode, stbuf); + } + UNLOCK(&inode->lock); + +out: + return ret; +} + +static int +posix_compare_timespec(struct timespec *first, struct timespec *second) +{ + if (first->tv_sec == second->tv_sec) + return first->tv_nsec - second->tv_nsec; + else + return first->tv_sec - second->tv_sec; +} + +int +posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + const char *realpath, + struct mdata_iatt *mdata_iatt, int *op_errno) +{ + uint64_t ctx; + posix_mdata_t *mdata = NULL; + posix_mdata_t imdata = { + 0, + }; + int ret = 0; + gf_boolean_t mdata_already_set = _gf_false; + + GF_VALIDATE_OR_GOTO("posix", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0 && ctx) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + mdata_already_set = _gf_true; + } else { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. gfid: %s", + uuid_utoa(inode->gfid)); + ret = -1; + *op_errno = ENOMEM; + goto unlock; + } + + ret = posix_fetch_mdata_xattr(this, realpath, -1, inode, + (void *)mdata, op_errno); + if (ret == 0) { + /* Got mdata from disk. This is a race, another client + * has healed the xattr during lookup. So set it in inode + * ctx */ + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + mdata_already_set = _gf_true; + } else { + *op_errno = 0; + mdata->version = 1; + mdata->flags = 0; + mdata->ctime.tv_sec = mdata_iatt->ia_ctime; + mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; + mdata->atime.tv_sec = mdata_iatt->ia_atime; + mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec; + mdata->mtime.tv_sec = mdata_iatt->ia_mtime; + mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } + } + + if (mdata_already_set) { + /* Compare and update the larger time */ + imdata.ctime.tv_sec = mdata_iatt->ia_ctime; + imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; + imdata.atime.tv_sec = mdata_iatt->ia_atime; + imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec; + imdata.mtime.tv_sec = mdata_iatt->ia_mtime; + imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + + if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) { + mdata->ctime = imdata.ctime; + } + if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) { + mdata->mtime = imdata.mtime; + } + if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) { + mdata->atime = imdata.atime; + } + } + + ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "gfid: %s key:%s ", uuid_utoa(inode->gfid), + GF_XATTR_MDATA_KEY); + *op_errno = errno; + goto unlock; + } + } +unlock: + UNLOCK(&inode->lock); +out: + return ret; +} + +/* posix_set_mdata_xattr updates the posix_mdata_t based on the flag + * in inode context and stores it on disk + */ +static int +posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *time, + struct timespec *u_atime, struct timespec *u_mtime, + struct iatt *stbuf, posix_mdata_flag_t *flag, + gf_boolean_t update_utime) +{ + uint64_t ctx; + posix_mdata_t *mdata = NULL; + int ret = -1; + int op_errno = 0; + + GF_VALIDATE_OR_GOTO("posix", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, time, out); + + if (update_utime && (flag->atime && !u_atime) && + (flag->mtime && !u_mtime)) { + goto out; + } + + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + } + if (ret == -1 || !mdata) { + /* + * Do we need to fetch the data from xattr + * If we does we can compare the value and store + * the largest data in inode ctx. + */ + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. file: %s: gfid: %s", + real_path ? real_path : "null", uuid_utoa(inode->gfid)); + ret = -1; + goto unlock; + } + + ret = posix_fetch_mdata_xattr(this, real_path, fd, inode, + (void *)mdata, &op_errno); + if (ret == 0) { + /* Got mdata from disk, set it in inode ctx. This case + * is hit when in-memory status is lost due to brick + * down scenario + */ + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } else { + /* + * This is the first time creating the time attr. This happens + * when you activate this feature. On this code path, only new + * files will create mdata xattr. The legacy files (files + * created before ctime enabled) will not have any xattr set. + * The xattr on legacy file will be set via lookup. + */ + + /* Don't create xattr with utimes/utimensat, only update if + * present. This otherwise causes issues during inservice + * upgrade. It causes inconsistent xattr values with in replica + * set. The scenario happens during upgrade where clients are + * older versions (without the ctime feature) and the server is + * upgraded to the new version (with the ctime feature which + * is enabled by default). + */ + + if (update_utime) { + UNLOCK(&inode->lock); + GF_FREE(mdata); + return 0; + } + + mdata->version = 1; + mdata->flags = 0; + mdata->ctime.tv_sec = time->tv_sec; + mdata->ctime.tv_nsec = time->tv_nsec; + mdata->atime.tv_sec = time->tv_sec; + mdata->atime.tv_nsec = time->tv_nsec; + mdata->mtime.tv_sec = time->tv_sec; + mdata->mtime.tv_nsec = time->tv_nsec; + + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } + } + + /* In distributed systems, there could be races with fops + * updating mtime/atime which could result in different + * mtime/atime for same file. So this makes sure, only the + * highest time is retained. If the mtime/atime update comes + * from the explicit utime syscall, it is allowed to set to + * previous or future time but the ctime is always set to + * current time. + */ + if (update_utime) { + if (flag->ctime && + posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } + if (flag->mtime) { + mdata->mtime = *u_mtime; + } + if (flag->atime) { + mdata->atime = *u_atime; + } + } else { + if (flag->ctime && + posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } + if (flag->mtime && + posix_compare_timespec(time, &mdata->mtime) > 0) { + mdata->mtime = *time; + } + if (flag->atime && + posix_compare_timespec(time, &mdata->atime) > 0) { + mdata->atime = *time; + } + } + + if (inode->ia_type == IA_INVAL) { + /* + * TODO: This is non-linked inode. So we have to sync the + * data into backend. Because inode_link may return + * a different inode. + */ + /* ret = posix_store_mdata_xattr (this, loc, fd, + * mdata); */ + } + /* + * With this patch set, we are setting the xattr for each update + * We should evaluate the performance, and based on that we can + * decide on asynchronous updation. + */ + ret = posix_store_mdata_xattr(this, real_path, fd, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "file: %s: gfid: %s key:%s ", real_path ? real_path : "null", + uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY); + goto unlock; + } + } +unlock: + UNLOCK(&inode->lock); +out: + if (ret == 0 && stbuf) { + stbuf->ia_ctime = mdata->ctime.tv_sec; + stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec; + stbuf->ia_mtime = mdata->mtime.tv_sec; + stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec; + stbuf->ia_atime = mdata->atime.tv_sec; + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } + + return ret; +} + +/* posix_update_utime_in_mdata updates the posix_mdata_t when mtime/atime + * is modified using syscall + */ +void +posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid) +{ + int32_t ret = 0; +#if defined(HAVE_UTIMENSAT) + struct timespec tv_atime = { + 0, + }; + struct timespec tv_mtime = { + 0, + }; +#else + struct timeval tv_atime = { + 0, + }; + struct timeval tv_mtime = { + 0, + }; +#endif + posix_mdata_flag_t flag = { + 0, + }; + + struct posix_private *priv = NULL; + + priv = this->private; + + /* NOTE: + * This routine (utimes) is intentionally allowed for all internal and + * external clients even if ctime is not set. This is because AFR and + * WORM uses time attributes for it's internal operations + */ + if (inode && priv->ctime) { + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv_atime.tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec); + + flag.ctime = 1; + flag.atime = 1; + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv_mtime.tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec); + + flag.ctime = 1; + flag.mtime = 1; + } + + if (flag.mtime || flag.atime) { + ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime, + &tv_atime, &tv_mtime, NULL, &flag, + _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } + } + } + return; +} + +/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs + * to be modified + */ +void +posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid) +{ + int32_t ret = 0; +#if defined(HAVE_UTIMENSAT) + struct timespec tv_ctime = { + 0, + }; +#else + struct timeval tv_ctime = { + 0, + }; +#endif + posix_mdata_flag_t flag = { + 0, + }; + + struct posix_private *priv = NULL; + priv = this->private; + + if (inode && priv->ctime) { + tv_ctime.tv_sec = stbuf->ia_ctime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec); + flag.ctime = 1; + + ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL, + NULL, NULL, &flag, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } + } + return; +} + +static void +posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) +{ + if (!flag) + return; + + flag->ctime = 0; + flag->atime = 0; + flag->mtime = 0; + + if (flags & MDATA_CTIME) + flag->ctime = 1; + if (flags & MDATA_MTIME) + flag->mtime = 1; + if (flags & MDATA_ATIME) + flag->atime = 1; +} + +static void +posix_get_parent_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) +{ + if (!flag) + return; + + flag->ctime = 0; + flag->atime = 0; + flag->mtime = 0; + + if (flags & MDATA_PAR_CTIME) + flag->ctime = 1; + if (flags & MDATA_PAR_MTIME) + flag->mtime = 1; + if (flags & MDATA_PAR_ATIME) + flag->atime = 1; +} + +void +posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf) +{ + posix_mdata_flag_t flag = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); + if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, NULL, NULL, stbuf, + &flag, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, + inode ? uuid_utoa(inode->gfid) : "No inode"); + } + } +out: + return; +} + +void +posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + const char *real_path, int fd, inode_t *inode, + struct iatt *stbuf) +{ + posix_mdata_flag_t flag = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (inode && priv->ctime) { + (void)posix_get_parent_mdata_flag(frame->root->flags, &flag); + if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, NULL, NULL, stbuf, + &flag, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, + uuid_utoa(inode->gfid)); + } + } +out: + return; +} + +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *real_path_out, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out) +{ + posix_mdata_flag_t flag = { + 0, + }; + posix_mdata_flag_t flag_dup = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; + + priv = this->private; + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); + if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } + + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : in: %s gfid_in:%s " + "out: %s gfid_out:%s", + real_path_in, + (inode_in ? uuid_utoa_r(inode_in->gfid, in_uuid_str) + : "No inode"), + real_path_out, + (inode_out ? uuid_utoa_r(inode_out->gfid, out_uuid_str) + : "No inode")); + goto out; + } + + flag_dup = flag; + + /* + * For the destination file, no need to update atime. + * It got modified. Hence the things that need to be + * changed are mtime and ctime (provided the utime + * xlator from the client has set those flags, which + * are just copied to flag_dup). + */ + if (flag.atime) + flag_dup.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, + &frame->root->ctime, NULL, NULL, stbuf_out, + &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_out, + inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + } + + /* + * For the source file, no need to change the mtime and ctime. + * For source file, it is only read operation. So, if at all + * anything needs to be updated, it is only the atime. + */ + if (flag.atime) + flag_dup.atime = flag.atime; + flag_dup.mtime = 0; + flag_dup.ctime = 0; + + ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, + &frame->root->ctime, NULL, NULL, stbuf_out, + &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_in, + inode_in ? uuid_utoa(inode_in->gfid) : "No inode"); + } + } +out: + return; +} diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h new file mode 100644 index 00000000000..d37014af93e --- /dev/null +++ b/xlators/storage/posix/src/posix-metadata.h @@ -0,0 +1,71 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _POSIX_METADATA_H +#define _POSIX_METADATA_H + +#include "posix-metadata-disk.h" + +/* In memory representation posix metadata xattr */ +typedef struct { + /* flags indicates valid fields in the structure */ + uint64_t flags; + struct timespec ctime; + struct timespec mtime; + struct timespec atime; + /* version of structure, bumped up if any new member is added */ + uint8_t version; + + char _pad[7]; /* manual padding */ +} posix_mdata_t; + +typedef struct { + unsigned short ctime : 1; + unsigned short mtime : 1; + unsigned short atime : 1; +} posix_mdata_flag_t; + +/* With inode lock*/ +int +posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf); +/* With out inode lock*/ +int +__posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf); +void +posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); +void +posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); +void +posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf); +void +posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + const char *real_path, int fd, inode_t *inode, + struct iatt *stbuf); +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *read_path_put, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); +int +posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + const char *realpath, + struct mdata_iatt *mdata_iatt, + int *op_errno); +void +posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in); + +#endif /* _POSIX_METADATA_H */ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index afc11fa813c..42b965434b9 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -7,6407 +7,95 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #define __XOPEN_SOURCE 500 -#include <openssl/md5.h> -#include <stdint.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <errno.h> -#include <libgen.h> -#include <pthread.h> -#include <ftw.h> -#include <sys/stat.h> -#include <signal.h> -#include <sys/uio.h> - -#ifndef GF_BSD_HOST_OS -#include <alloca.h> -#endif /* GF_BSD_HOST_OS */ - -#ifdef HAVE_LINKAT -#include <fcntl.h> -#endif /* HAVE_LINKAT */ - -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" -#include "posix.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" -#include "glusterfs3-xdr.h" -#include "hashfn.h" -#include "posix-aio.h" -#include "glusterfs-acl.h" - -extern char *marker_xattrs[]; -#define ALIGN_SIZE 4096 - -#undef HAVE_SET_FSID -#ifdef HAVE_SET_FSID - -#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; - -#define SET_FS_ID(uid, gid) do { \ - old_fsuid = setfsuid (uid); \ - old_fsgid = setfsgid (gid); \ - } while (0) - -#define SET_TO_OLD_FS_ID() do { \ - setfsuid (old_fsuid); \ - setfsgid (old_fsgid); \ - } while (0) - -#else - -#define DECLARE_OLD_FS_ID_VAR -#define SET_FS_ID(uid, gid) -#define SET_TO_OLD_FS_ID() - -#endif -int -posix_forget (xlator_t *this, inode_t *inode) -{ - uint64_t tmp_cache = 0; - if (!inode_ctx_del (inode, this, &tmp_cache)) - dict_destroy ((dict_t *)(long)tmp_cache); - - return 0; -} - -/* Regular fops */ - -int32_t -posix_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xdata) -{ - struct iatt buf = {0, }; - int32_t op_ret = -1; - int32_t entry_ret = 0; - int32_t op_errno = 0; - dict_t * xattr = NULL; - char * real_path = NULL; - char * par_path = NULL; - struct iatt postparent = {0,}; - int32_t gfidless = 0; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - struct posix_private *priv = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - - /* The Hidden directory should be for housekeeping purpose and it - should not get any gfid on it */ - if (__is_root_gfid (loc->pargfid) && loc->name - && (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { - gf_log (this->name, GF_LOG_WARNING, - "Lookup issued on %s, which is not permitted", - GF_HIDDEN_PATH); - op_errno = EPERM; - op_ret = -1; - goto out; - } - - op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); - op_ret = -1; - if (gf_uuid_is_null (loc->pargfid) || (loc->name == NULL)) { - /* nameless lookup */ - MAKE_INODE_HANDLE (real_path, this, loc, &buf); - } else { - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); - - if (gf_uuid_is_null (loc->inode->gfid)) { - posix_gfid_heal (this, real_path, loc, xdata); - MAKE_ENTRY_HANDLE (real_path, par_path, this, - loc, &buf); - } - } - - op_errno = errno; - - if (op_ret == -1) { - if (op_errno != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - real_path ? real_path : "null", - strerror (op_errno)); - } - - entry_ret = -1; - goto parent; - } - - if (xdata && (op_ret == 0)) { - xattr = posix_xattr_fill (this, real_path, loc, NULL, -1, xdata, - &buf); - } - - if (priv->update_pgfid_nlinks) { - if (!gf_uuid_is_null (loc->pargfid) && !IA_ISDIR (buf.ia_type)) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, - PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - - LOCK (&loc->inode->lock); - { - SET_PGFID_XATTR_IF_ABSENT (real_path, - pgfid_xattr_key, - nlink_samepgfid, - XATTR_CREATE, op_ret, - this, unlock); - } -unlock: - UNLOCK (&loc->inode->lock); - } - } - -parent: - if (par_path) { - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - if (op_errno == ENOENT) - /* If parent directory is missing in a lookup, - errno should be ESTALE (bad handle) and not - ENOENT (missing entry) - */ - op_errno = ESTALE; - goto out; - } - } - - op_ret = entry_ret; -out: - if (!op_ret && !gfidless && gf_uuid_is_null (buf.ia_gfid)) { - gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for " - "%s", (real_path) ? real_path: ""); - op_ret = -1; - op_errno = ENODATA; - } - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &buf, xattr, &postparent); - - if (xattr) - dict_unref (xattr); - - return 0; -} - - -int32_t -posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - struct iatt buf = {0,}; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_private *priv = NULL; - char *real_path = NULL; - dict_t *xattr_rsp = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - MAKE_INODE_HANDLE (real_path, this, loc, &buf); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, (op_errno == ENOENT)? - GF_LOG_DEBUG:GF_LOG_ERROR, - "lstat on %s failed: %s", - real_path ? real_path : "<null>", - strerror (op_errno)); - goto out; - } - if (xdata) - xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1, - xdata, &buf); - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, xattr_rsp); - if (xattr_rsp) - dict_unref (xattr_rsp); - - return 0; -} - -static int -posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf) -{ - int32_t ret = -1; - mode_t mode = 0; - struct stat stat; - int is_symlink = 0; - - ret = sys_lstat (path, &stat); - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, - "lstat failed: %s (%s)", path, strerror (errno)); - goto out; - } - - if (S_ISLNK (stat.st_mode)) - is_symlink = 1; - - mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - ret = lchmod (path, mode); - if ((ret == -1) && (errno == ENOSYS)) { - /* in Linux symlinks are always in mode 0777 and no - such call as lchmod exists. - */ - gf_log (this->name, GF_LOG_DEBUG, - "%s (%s)", path, strerror (errno)); - if (is_symlink) { - ret = 0; - goto out; - } - - ret = chmod (path, mode); - } -out: - return ret; -} - -static int -posix_do_chown (xlator_t *this, - const char *path, - struct iatt *stbuf, - int32_t valid) -{ - int32_t ret = -1; - uid_t uid = -1; - gid_t gid = -1; - - if (valid & GF_SET_ATTR_UID) - uid = stbuf->ia_uid; - - if (valid & GF_SET_ATTR_GID) - gid = stbuf->ia_gid; - - ret = lchown (path, uid, gid); - - return ret; -} - -static int -posix_do_utimes (xlator_t *this, - const char *path, - struct iatt *stbuf) -{ - int32_t ret = -1; - struct timeval tv[2] = {{0,},{0,}}; - struct stat stat; - int is_symlink = 0; - - ret = sys_lstat (path, &stat); - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, - "%s (%s)", path, strerror (errno)); - goto out; - } - - if (S_ISLNK (stat.st_mode)) - is_symlink = 1; - - tv[0].tv_sec = stbuf->ia_atime; - tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; - tv[1].tv_sec = stbuf->ia_mtime; - tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; - - ret = lutimes (path, tv); - if ((ret == -1) && (errno == ENOSYS)) { - gf_log (this->name, GF_LOG_DEBUG, - "%s (%s)", path, strerror (errno)); - if (is_symlink) { - ret = 0; - goto out; - } - - ret = utimes (path, tv); - } - -out: - return ret; -} - -int -posix_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_path, this, loc, &statpre); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (lstat) on %s failed: %s", - real_path ? real_path : "<null>", strerror (op_errno)); - goto out; - } - - if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){ - op_ret = posix_do_chown (this, real_path, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (chown) on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - } - - if (valid & GF_SET_ATTR_MODE) { - op_ret = posix_do_chmod (this, real_path, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (chmod) on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - } - - if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { - op_ret = posix_do_utimes (this, real_path, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (utimes) on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - } - - if (!valid) { - op_ret = lchown (real_path, -1, -1); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lchown (%s, -1, -1) failed => (%s)", - real_path, strerror (op_errno)); - - goto out; - } - } - - op_ret = posix_pstat (this, loc->gfid, real_path, &statpost); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "setattr (lstat) on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, - &statpre, &statpost, NULL); - - return 0; -} - -int32_t -posix_do_fchown (xlator_t *this, - int fd, - struct iatt *stbuf, - int32_t valid) -{ - int ret = -1; - uid_t uid = -1; - gid_t gid = -1; - - if (valid & GF_SET_ATTR_UID) - uid = stbuf->ia_uid; - - if (valid & GF_SET_ATTR_GID) - gid = stbuf->ia_gid; - - ret = fchown (fd, uid, gid); - - return ret; -} - - -int32_t -posix_do_fchmod (xlator_t *this, - int fd, struct iatt *stbuf) -{ - mode_t mode = 0; - - mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - return fchmod (fd, mode); -} - -static int -posix_do_futimes (xlator_t *this, - int fd, - struct iatt *stbuf) -{ - gf_log (this->name, GF_LOG_WARNING, "function not implemented fd(%d)", fd); - - errno = ENOSYS; - return -1; -} - -int -posix_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - struct posix_fd *pfd = NULL; - int32_t ret = -1; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); - goto out; - } - - op_ret = posix_fdstat (this, pfd->fd, &statpre); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fsetattr (fstat) failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { - op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fsetattr (fchown) failed on fd=%p: %s", - fd, strerror (op_errno)); - goto out; - } - - } - - if (valid & GF_SET_ATTR_MODE) { - op_ret = posix_do_fchmod (this, pfd->fd, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fsetattr (fchmod) failed on fd=%p: %s", - fd, strerror (op_errno)); - goto out; - } - } - - if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { - op_ret = posix_do_futimes (this, pfd->fd, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fsetattr (futimes) on failed fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - } - - if (!valid) { - op_ret = fchown (pfd->fd, -1, -1); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fchown (%d, -1, -1) failed => (%s)", - pfd->fd, strerror (op_errno)); - - goto out; - } - } - - op_ret = posix_fdstat (this, pfd->fd, &statpost); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fsetattr (fstat) failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, - &statpre, &statpost, NULL); - - return 0; -} - -static int32_t -posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - off_t offset, size_t len, struct iatt *statpre, - struct iatt *statpost) -{ - struct posix_fd *pfd = NULL; - int32_t ret = -1; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); - goto out; - } - - ret = posix_fdstat (this, pfd->fd, statpre); - if (ret == -1) { - ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "fallocate (fstat) failed on fd=%p: %s", fd, - strerror (errno)); - goto out; - } - - ret = sys_fallocate(pfd->fd, flags, offset, len); - if (ret == -1) { - ret = -errno; - goto out; - } - - ret = posix_fdstat (this, pfd->fd, statpost); - if (ret == -1) { - ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "fallocate (fstat) failed on fd=%p: %s", fd, - strerror (errno)); - goto out; - } - -out: - SET_TO_OLD_FS_ID (); - - return ret; -} - -char* -_page_aligned_alloc (size_t size, char **aligned_buf) -{ - char *alloc_buf = NULL; - char *buf = NULL; - - alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); - if (!alloc_buf) - goto out; - /* page aligned buffer */ - buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); - *aligned_buf = buf; -out: - return alloc_buf; -} - -static int32_t -_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) -{ - off_t num_vect = 0; - off_t num_loop = 1; - off_t idx = 0; - int32_t op_ret = -1; - int32_t vect_size = VECTOR_SIZE; - off_t remain = 0; - off_t extra = 0; - struct iovec *vector = NULL; - char *iov_base = NULL; - char *alloc_buf = NULL; - - if (len == 0) - return 0; - if (len < VECTOR_SIZE) - vect_size = len; - - num_vect = len / (vect_size); - remain = len % vect_size ; - if (num_vect > MAX_NO_VECT) { - extra = num_vect % MAX_NO_VECT; - num_loop = num_vect / MAX_NO_VECT; - num_vect = MAX_NO_VECT; - } - - vector = GF_CALLOC (num_vect, sizeof(struct iovec), - gf_common_mt_iovec); - if (!vector) - return -1; - if (o_direct) { - alloc_buf = _page_aligned_alloc(vect_size, &iov_base); - if (!alloc_buf) { - gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, - "memory alloc failed, vect_size %d: %s", - vect_size, strerror(errno)); - GF_FREE(vector); - return -1; - } - } else { - iov_base = GF_CALLOC (vect_size, sizeof(char), - gf_common_mt_char); - if (!iov_base) { - GF_FREE(vector); - return -1; - } - } - - for (idx = 0; idx < num_vect; idx++) { - vector[idx].iov_base = iov_base; - vector[idx].iov_len = vect_size; - } - if (lseek(fd, offset, SEEK_SET) < 0) { - op_ret = -1; - goto err; - } - - for (idx = 0; idx < num_loop; idx++) { - op_ret = writev(fd, vector, num_vect); - if (op_ret < 0) - goto err; - } - if (extra) { - op_ret = writev(fd, vector, extra); - if (op_ret < 0) - goto err; - } - if (remain) { - vector[0].iov_len = remain; - op_ret = writev(fd, vector , 1); - if (op_ret < 0) - goto err; - } -err: - if (o_direct) - GF_FREE(alloc_buf); - else - GF_FREE(iov_base); - GF_FREE(vector); - return op_ret; -} - -static int32_t -posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, off_t len, struct iatt *statpre, - struct iatt *statpost) -{ - struct posix_fd *pfd = NULL; - int32_t ret = -1; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); - goto out; - } - - ret = posix_fdstat (this, pfd->fd, statpre); - if (ret == -1) { - ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation fstat failed on fd = %p: %s", fd, - strerror (errno)); - goto out; - } - ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); - if (ret < 0) { - ret = -errno; - gf_log(this->name, GF_LOG_ERROR, - "zerofill failed on fd %d length %" PRId64 " %s", - pfd->fd, len, strerror(errno)); - goto out; - } - if (pfd->flags & (O_SYNC|O_DSYNC)) { - ret = fsync (pfd->fd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "fsync() in writev on fd %d failed: %s", - pfd->fd, strerror (errno)); - ret = -errno; - goto out; - } - } - - ret = posix_fdstat (this, pfd->fd, statpost); - if (ret == -1) { - ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "post operation fstat failed on fd=%p: %s", fd, - strerror (errno)); - goto out; - } - -out: - SET_TO_OLD_FS_ID (); - - return ret; -} - -static int32_t -_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, - off_t offset, size_t len, dict_t *xdata) -{ - int32_t ret; - int32_t flags = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - -#ifdef FALLOC_FL_KEEP_SIZE - if (keep_size) - flags = FALLOC_FL_KEEP_SIZE; -#endif /* FALLOC_FL_KEEP_SIZE */ - - ret = posix_do_fallocate(frame, this, fd, flags, offset, len, - &statpre, &statpost); - if (ret < 0) - goto err; - - STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -err: - STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); - return 0; -} - -static int32_t -posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) -{ - int32_t ret; -#ifndef FALLOC_FL_KEEP_SIZE - ret = EOPNOTSUPP; - -#else /* FALLOC_FL_KEEP_SIZE */ - int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - - ret = posix_do_fallocate(frame, this, fd, flags, offset, len, - &statpre, &statpost); - if (ret < 0) - goto err; - - STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -err: -#endif /* FALLOC_FL_KEEP_SIZE */ - STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); - return 0; -} - -static int32_t -posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) -{ - int32_t ret = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - - ret = posix_do_zerofill(frame, this, fd, offset, len, - &statpre, &statpost); - if (ret < 0) - goto err; - - STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); - return 0; - -} - -static int32_t -posix_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - /* - * IPC is for inter-translator communication. If one gets here, it - * means somebody sent one that nobody else recognized, which is an - * error much like an uncaught exception. - */ - gf_log (this->name, GF_LOG_ERROR, "GF_LOG_IPC(%d) not handled", op); - STACK_UNWIND_STRICT (ipc, frame, -1, -EOPNOTSUPP, NULL); - return 0; - -} - -int32_t -posix_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, dict_t *xdata) -{ - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - DIR * dir = NULL; - struct posix_fd * pfd = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_errno = ESTALE; - goto out; - } - - op_ret = -1; - dir = opendir (real_path); - - if (dir == NULL) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "opendir failed on %s: %s", - real_path, strerror (op_errno)); - goto out; - } - - op_ret = dirfd (dir); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "dirfd() failed on %s: %s", - real_path, strerror (op_errno)); - goto out; - } - - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - op_errno = errno; - goto out; - } - - pfd->dir = dir; - pfd->dir_eof = -1; - pfd->fd = op_ret; - - op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); - if (op_ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to set the fd context path=%s fd=%p", - real_path, fd); - - op_ret = 0; - -out: - if (op_ret == -1) { - if (dir) { - closedir (dir); - dir = NULL; - } - if (pfd) { - GF_FREE (pfd); - pfd = NULL; - } - } - - SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); - return 0; -} - -int32_t -posix_releasedir (xlator_t *this, - fd_t *fd) -{ - struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; - int ret = 0; - - struct posix_private *priv = NULL; - - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd from fd=%p is NULL", fd); - goto out; - } - - pfd = (struct posix_fd *)(long)tmp_pfd; - if (!pfd->dir) { - gf_log (this->name, GF_LOG_WARNING, - "pfd->dir is NULL for fd=%p", fd); - goto out; - } - - priv = this->private; - - pthread_mutex_lock (&priv->janitor_lock); - { - INIT_LIST_HEAD (&pfd->list); - list_add_tail (&pfd->list, &priv->janitor_fds); - pthread_cond_signal (&priv->janitor_cond); - } - pthread_mutex_unlock (&priv->janitor_lock); - -out: - return 0; -} - - -int32_t -posix_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size, dict_t *xdata) -{ - char * dest = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - struct iatt stbuf = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - dest = alloca (size + 1); - - MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - loc->path ? loc->path : "<null>", - strerror (op_errno)); - goto out; - } - - op_ret = sys_readlink (real_path, dest, size); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "readlink on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - - dest[op_ret] = 0; -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); - - return 0; -} - - -int -posix_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) -{ - int tmp_fd = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = 0; - char *par_path = 0; - struct iatt stbuf = { 0, }; - struct posix_private *priv = NULL; - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - void * uuid_req = NULL; - int32_t nlink_samepgfid = 0; - char *pgfid_xattr_key = NULL; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - gf_boolean_t linked = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); - - gid = frame->root->gid; - - SET_FS_ID (frame->root->uid, gid); - - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - } - - /* Check if the 'gfid' already exists, because this mknod may be an - internal call from distribute for creating 'linkfile', and that - linkfile may be for a hardlinked file */ - if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { - dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); - op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (op_ret) { - gf_log (this->name, GF_LOG_DEBUG, - "failed to get the gfid from dict for %s", - loc->path); - goto real_op; - } - op_ret = posix_create_link_if_gfid_exists (this, uuid_req, - real_path); - if (!op_ret) { - linked = _gf_true; - goto post_op; - } - } - -real_op: -#ifdef __NetBSD__ - if (S_ISFIFO(mode)) - op_ret = mkfifo (real_path, mode); - else -#endif /* __NetBSD__ */ - op_ret = mknod (real_path, mode, dev); - - if (op_ret == -1) { - op_errno = errno; - if ((op_errno == EINVAL) && S_ISREG (mode)) { - /* Over Darwin, mknod with (S_IFREG|mode) - doesn't work */ - tmp_fd = creat (real_path, mode); - if (tmp_fd == -1) { - gf_log (this->name, GF_LOG_ERROR, - "create failed on %s: %s", - real_path, strerror (errno)); - goto out; - } - close (tmp_fd); - } else { - - gf_log (this->name, GF_LOG_ERROR, - "mknod on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - } - - entry_created = _gf_true; - -#ifndef HAVE_SET_FSID - op_ret = lchown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lchown on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } -#endif - -post_op: - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting ACLs on %s failed (%s)", real_path, - strerror (errno)); - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - nlink_samepgfid = 1; - - SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, - XATTR_CREATE, op_ret, this, ignore); - } - -ignore: - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting xattrs on %s failed (%s)", real_path, - strerror (errno)); - } - - if (!linked) { - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "mknod on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) { - if (S_ISREG (mode)) - sys_unlink (real_path); - else - sys_rmdir (real_path); - } - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - -int -posix_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL, *gfid_path = NULL; - char *par_path = NULL; - struct iatt stbuf = {0, }; - struct posix_private *priv = NULL; - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - void *uuid_req = NULL; - ssize_t size = 0; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - /* The Hidden directory should be for housekeeping purpose and it - should not get created from a user request */ - if (__is_root_gfid (loc->pargfid) && - (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { - gf_log (this->name, GF_LOG_WARNING, - "mkdir issued on %s, which is not permitted", - GF_HIDDEN_PATH); - op_errno = EPERM; - op_ret = -1; - goto out; - } - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - gid = frame->root->gid; - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - - SET_FS_ID (frame->root->uid, gid); - - op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (uuid_req && !gf_uuid_is_null (uuid_req)) { - op_ret = posix_istat (this, uuid_req, NULL, &stbuf); - if ((op_ret == 0) && IA_ISDIR (stbuf.ia_type)) { - size = posix_handle_path (this, uuid_req, NULL, NULL, - 0); - if (size > 0) - gfid_path = alloca (size); - - if (gfid_path) - posix_handle_path (this, uuid_req, NULL, - gfid_path, size); - - gf_log (this->name, GF_LOG_WARNING, - "mkdir (%s): gfid (%s) is already associated " - "with directory (%s). Hence, both directories " - "will share same gfid and this can lead to " - "inconsistencies.", loc->path, - uuid_utoa (uuid_req), gfid_path ? gfid_path - : "<NULL>"); - } - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - mode |= S_ISGID; - } - - op_ret = mkdir (real_path, mode); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "mkdir of %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - - entry_created = _gf_true; - -#ifndef HAVE_SET_FSID - op_ret = chown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chown on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } -#endif - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting ACLs on %s failed (%s)", real_path, - strerror (errno)); - } - - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting xattrs on %s failed (%s)", real_path, - strerror (errno)); - } - - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) - sys_rmdir (real_path); - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - -int32_t -posix_unlink_gfid_handle_and_entry (xlator_t *this, const char *real_path, - struct iatt *stbuf, int32_t *op_errno) -{ - int32_t ret = 0; - - /* Unlink the gfid_handle_first */ - - if (stbuf && stbuf->ia_nlink == 1) { - ret = posix_handle_unset (this, stbuf->ia_gfid, NULL); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "unlink of gfid handle failed for path:%s with" - " gfid %s with errno:%s", real_path, - uuid_utoa (stbuf->ia_gfid), strerror (errno)); - } - } - - /* Unlink the actual file */ - ret = sys_unlink (real_path); - if (ret == -1) { - if (op_errno) - *op_errno = errno; - - gf_log (this->name, GF_LOG_ERROR, - "unlink of %s failed: %s", real_path, - strerror (errno)); - goto err; - } - - return 0; - -err: - return -1; -} - -int32_t -posix_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflag, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *par_path = NULL; - int32_t fd = -1; - struct iatt stbuf = {0,}; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - int32_t unlink_if_linkto = 0; - int32_t check_open_fd = 0; - int32_t skip_unlink = 0; - int32_t ctr_link_req = 0; - ssize_t xattr_size = -1; - int32_t is_dht_linkto_file = 0; - dict_t *unwind_dict = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - priv = this->private; - - op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK, - &check_open_fd); - - if (!op_ret && check_open_fd) { - - LOCK (&loc->inode->lock); - - if (loc->inode->fd_count) { - skip_unlink = 1; - } - - UNLOCK (&loc->inode->lock); - - gf_log (this->name, GF_LOG_INFO, "open-fd-key-status: " - "%"PRIu32" for %s", skip_unlink, real_path); - - if (skip_unlink) { - op_ret = -1; - op_errno = EBUSY; - goto out; - } - } - - - op_ret = dict_get_int32 (xdata, DHT_SKIP_NON_LINKTO_UNLINK, - &unlink_if_linkto); - - if (!op_ret && unlink_if_linkto) { - - LOCK (&loc->inode->lock); - - xattr_size = sys_lgetxattr (real_path, LINKTO, NULL, 0); - - if (xattr_size <= 0) { - skip_unlink = 1; - } else { - is_dht_linkto_file = IS_DHT_LINKFILE_MODE (&stbuf); - if (!is_dht_linkto_file) - skip_unlink = 1; - } - - UNLOCK (&loc->inode->lock); - - gf_log (this->name, GF_LOG_INFO, "linkto_xattr status: " - "%"PRIu32" for %s", skip_unlink, real_path); - - if (skip_unlink) { - op_ret = -1; - op_errno = EBUSY; - goto out; - } - } - - - if (priv->background_unlink) { - if (IA_ISREG (loc->inode->ia_type)) { - fd = open (real_path, O_RDONLY); - if (fd == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "open of %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - } - } - - if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - LOCK (&loc->inode->lock); - { - UNLINK_MODIFY_PGFID_XATTR (real_path, pgfid_xattr_key, - nlink_samepgfid, 0, op_ret, - this, unlock); - } - unlock: - UNLOCK (&loc->inode->lock); - - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "modification of " - "parent gfid xattr failed (path:%s gfid:%s)", - real_path, uuid_utoa (loc->inode->gfid)); - if (op_errno != ENOATTR) - /* Allow unlink if pgfid xattr is not set. */ - goto out; - } - } - - op_ret = posix_unlink_gfid_handle_and_entry (this, real_path, &stbuf, - &op_errno); - if (op_ret == -1) { - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - /* - * - * Check if there is a CTR_REQUEST_LINK_COUNT_XDATA from CTR Xlator - * - * */ - op_ret = dict_get_int32 (xdata, CTR_REQUEST_LINK_COUNT_XDATA, - &ctr_link_req); - if (op_ret) { - /*Since no request no response*/ - op_ret = 0; - goto out; - } - - /* Sending back inode link count to ctr_unlink(changetimerecoder xlator) - * via "CTR_RESPONSE_LINK_COUNT_XDATA" key using unwind_dict. - * CTR Xlator will clear all the records if the link count has become 1 - * i.e this was the last hard link. - * */ - unwind_dict = dict_new (); - /* Even if unwind_dict fails to alloc memory we will not mark the FOP - * unsuccessful - * because this dict is only used by CTR Xlator to clear - * all records if link count == 0*/ - if (!unwind_dict) { - op_ret = 0; - gf_log (this->name, GF_LOG_WARNING, - "Memory allocation failure while " - "creating unwind_dict"); - goto out; - } - /* Even if unwind_dict fails to set CTR_RESPONSE_LINK_COUNT_XDATA we - * will not mark the FOP unsuccessful - * because this dict is only used by CTR Xlator to clear - * all records if link count == 0*/ - op_ret = dict_set_uint32 (unwind_dict, CTR_RESPONSE_LINK_COUNT_XDATA, - stbuf.ia_nlink); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to set CTR_RESPONSE_LINK_COUNT_XDATA"); - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, - &preparent, &postparent, unwind_dict); - - if (fd != -1) { - close (fd); - } - - /* unref unwind_dict*/ - if (unwind_dict) { - dict_unref (unwind_dict); - } - - return 0; -} - - -int -posix_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - char * par_path = NULL; - char * gfid_str = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - struct iatt stbuf; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - /* The Hidden directory should be for housekeeping purpose and it - should not get deleted from inside process */ - if (__is_root_gfid (loc->pargfid) && - (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { - gf_log (this->name, GF_LOG_WARNING, - "rmdir issued on %s, which is not permitted", - GF_HIDDEN_PATH); - op_errno = EPERM; - op_ret = -1; - goto out; - } - - priv = this->private; - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - if (flags) { - gfid_str = uuid_utoa (stbuf.ia_gfid); - char *tmp_path = alloca (strlen (priv->trash_path) + - strlen ("/") + - strlen (gfid_str) + 1); - - op_ret = mkdir (priv->trash_path, 0755); - if (errno != EEXIST && op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "mkdir of %s failed: %s", priv->trash_path, - strerror (errno)); - } else { - sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str); - op_ret = rename (real_path, tmp_path); - } - } else { - op_ret = rmdir (real_path); - } - op_errno = errno; - - if (op_ret == 0) { - posix_handle_unset (this, stbuf.ia_gfid, NULL); - } - - if (op_errno == EEXIST) - /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ - op_errno = ENOTEMPTY; - - /* No need to log a common error as ENOTEMPTY */ - if (op_ret == -1 && op_errno != ENOTEMPTY) { - gf_log (this->name, GF_LOG_ERROR, - "rmdir of %s failed: %s", real_path, - strerror (op_errno)); - } - - if (op_ret == -1) { - gf_log (this->name, - (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR, - "%s on %s failed", (flags) ? "rename" : "rmdir", - real_path); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - &preparent, &postparent, NULL); - - return 0; -} - - -int -posix_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - char * par_path = 0; - struct iatt stbuf = { 0, }; - struct posix_private *priv = NULL; - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (linkname, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - - gid = frame->root->gid; - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - SET_FS_ID (frame->root->uid, gid); - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - } - - op_ret = symlink (linkname, real_path); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "symlink of %s --> %s failed: %s", - real_path, linkname, strerror (op_errno)); - goto out; - } - - entry_created = _gf_true; - -#ifndef HAVE_SET_FSID - op_ret = lchown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lchown failed on %s: %s", - real_path, strerror (op_errno)); - goto out; - } -#endif - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting ACLs on %s failed (%s)", real_path, - strerror (errno)); - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - nlink_samepgfid = 1; - SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, - XATTR_CREATE, op_ret, this, ignore); - } -ignore: - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting xattrs on %s failed (%s)", real_path, - strerror (errno)); - } - - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat failed on %s: %s", - real_path, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) - sys_unlink (real_path); - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - - -int -posix_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_oldpath = NULL; - char *real_newpath = NULL; - char *par_oldpath = NULL; - char *par_newpath = NULL; - struct iatt stbuf = {0, }; - struct posix_private *priv = NULL; - char was_present = 1; - struct iatt preoldparent = {0, }; - struct iatt postoldparent = {0, }; - struct iatt prenewparent = {0, }; - struct iatt postnewparent = {0, }; - char olddirid[64]; - char newdirid[64]; - uuid_t victim = {0}; - int was_dir = 0; - int nlink = 0; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (oldloc, out); - VALIDATE_OR_GOTO (newloc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); - if (!real_oldpath || !par_oldpath) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - if (!real_newpath || !par_newpath) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent %s failed: %s", - par_oldpath, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - par_newpath, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)){ - was_present = 0; - } else { - gf_uuid_copy (victim, stbuf.ia_gfid); - if (IA_ISDIR (stbuf.ia_type)) - was_dir = 1; - nlink = stbuf.ia_nlink; - } - - if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { - gf_log (this->name, GF_LOG_WARNING, - "found directory at %s while expecting ENOENT", - real_newpath); - op_ret = -1; - op_errno = EEXIST; - goto out; - } - - if (was_present && IA_ISDIR(stbuf.ia_type) && - gf_uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) { - gf_log (this->name, GF_LOG_WARNING, - "found directory %s at %s while renaming %s", - uuid_utoa_r (newloc->inode->gfid, olddirid), - real_newpath, - uuid_utoa_r (stbuf.ia_gfid, newdirid)); - op_ret = -1; - op_errno = EEXIST; - goto out; - } - - if (IA_ISDIR (oldloc->inode->ia_type)) - posix_handle_unset (this, oldloc->inode->gfid, NULL); - - LOCK (&oldloc->inode->lock); - { - if (!IA_ISDIR (oldloc->inode->ia_type) - && priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, - PGFID_XATTR_KEY_PREFIX, - oldloc->pargfid); - UNLINK_MODIFY_PGFID_XATTR (real_oldpath, - pgfid_xattr_key, - nlink_samepgfid, 0, - op_ret, - this, unlock); - } - - op_ret = sys_rename (real_oldpath, real_newpath); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, - (op_errno == ENOTEMPTY ? GF_LOG_DEBUG - : GF_LOG_ERROR), - "rename of %s to %s failed: %s", - real_oldpath, real_newpath, - strerror (op_errno)); - - if (priv->update_pgfid_nlinks - && !IA_ISDIR (oldloc->inode->ia_type)) { - LINK_MODIFY_PGFID_XATTR (real_oldpath, - pgfid_xattr_key, - nlink_samepgfid, 0, - op_ret, - this, unlock); - } - - goto unlock; - } - - if (!IA_ISDIR (oldloc->inode->ia_type) - && priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, - PGFID_XATTR_KEY_PREFIX, - newloc->pargfid); - LINK_MODIFY_PGFID_XATTR (real_newpath, - pgfid_xattr_key, - nlink_samepgfid, 0, - op_ret, - this, unlock); - } - } -unlock: - UNLOCK (&oldloc->inode->lock); - - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "modification of " - "parent gfid xattr failed (gfid:%s)", - uuid_utoa (oldloc->inode->gfid)); - goto out; - } - - if (was_dir) - posix_handle_unset (this, victim, NULL); - - if (was_present && !was_dir && nlink == 1) - posix_handle_unset (this, victim, NULL); - - if (IA_ISDIR (oldloc->inode->ia_type)) { - posix_handle_soft (this, real_newpath, newloc, - oldloc->inode->gfid, NULL); - } - - op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - real_newpath, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_oldpath, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_newpath, strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, - &preoldparent, &postoldparent, - &prenewparent, &postnewparent, NULL); - - return 0; -} - - -int -posix_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_oldpath = 0; - char *real_newpath = 0; - char *par_newpath = 0; - struct iatt stbuf = {0, }; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - int32_t nlink_samepgfid = 0; - char *pgfid_xattr_key = NULL; - gf_boolean_t entry_created = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (oldloc, out); - VALIDATE_OR_GOTO (newloc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); - if (!real_oldpath) { - op_errno = errno; - goto out; - } - - MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - if (!real_newpath || !par_newpath) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", - par_newpath, strerror (op_errno)); - goto out; - } - - - op_ret = sys_link (real_oldpath, real_newpath); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "link %s to %s failed: %s", - real_oldpath, real_newpath, strerror (op_errno)); - goto out; - } - - entry_created = _gf_true; - - op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - real_newpath, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", - par_newpath, strerror (op_errno)); - goto out; - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - newloc->pargfid); - - LOCK (&newloc->inode->lock); - { - LINK_MODIFY_PGFID_XATTR (real_newpath, pgfid_xattr_key, - nlink_samepgfid, 0, op_ret, - this, unlock); - } - unlock: - UNLOCK (&newloc->inode->lock); - - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "modification of " - "parent gfid xattr failed (path:%s gfid:%s)", - real_newpath, uuid_utoa (newloc->inode->gfid)); - goto out; - } - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, - (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) - sys_unlink (real_newpath); - } - - return 0; -} - - -int32_t -posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = 0; - struct posix_private *priv = NULL; - struct iatt prebuf = {0,}; - struct iatt postbuf = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - MAKE_INODE_HANDLE (real_path, this, loc, &prebuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on %s failed: %s", - real_path ? real_path : "<null>", strerror (op_errno)); - goto out; - } - - op_ret = truncate (real_path, offset); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "truncate on %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - - op_ret = 0; -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - &prebuf, &postbuf, NULL); - - return 0; -} - - -int -posix_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t _fd = -1; - int _flags = 0; - char * real_path = NULL; - char * par_path = NULL; - struct iatt stbuf = {0, }; - struct posix_fd * pfd = NULL; - struct posix_private * priv = NULL; - char was_present = 1; - - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - - int nlink_samepgfid = 0; - char * pgfid_xattr_key = NULL; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - - gid = frame->root->gid; - - SET_FS_ID (frame->root->uid, gid); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - } - - if (!flags) { - _flags = O_CREAT | O_RDWR | O_EXCL; - } - else { - _flags = flags | O_CREAT; - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)) { - was_present = 0; - } - - if (priv->o_direct) - _flags |= O_DIRECT; - - _fd = open (real_path, _flags, mode); - - if (_fd == -1) { - op_errno = errno; - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "open on %s failed: %s", real_path, - strerror (op_errno)); - goto out; - } - - if ((_flags & O_CREAT) && (_flags & O_EXCL)) { - entry_created = _gf_true; - } - - - if (was_present) - goto fill_stat; - -#ifndef HAVE_SET_FSID - op_ret = chown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chown on %s failed: %s", - real_path, strerror (op_errno)); - } +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE #endif - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting ACLs on %s failed (%s)", real_path, - strerror (errno)); - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - nlink_samepgfid = 1; - SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, - XATTR_CREATE, op_ret, this, ignore); - } -ignore: - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting xattrs on %s failed (%s)", real_path, - strerror (errno)); - } - -fill_stat: - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - - op_ret = posix_fdstat (this, _fd, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fstat on %d failed: %s", _fd, strerror (op_errno)); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent %s failed: %s", - par_path, strerror (op_errno)); - goto out; - } - - op_ret = -1; - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - op_errno = errno; - goto out; - } - - pfd->flags = flags; - pfd->fd = _fd; - - op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); - if (op_ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to set the fd context path=%s fd=%p", - real_path, fd); - - LOCK (&priv->lock); - { - priv->nr_files++; - } - UNLOCK (&priv->lock); - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - if ((-1 == op_ret) && (_fd != -1)) { - close (_fd); - } - - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, - fd, (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, xdata); - - if (op_ret < 0) { - if (entry_created) - sys_unlink (real_path); - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - -int32_t -posix_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - int32_t _fd = -1; - struct posix_fd *pfd = NULL; - struct posix_private *priv = NULL; - struct iatt stbuf = {0, }; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - if (IA_ISLNK (stbuf.ia_type)) { - op_ret = -1; - op_errno = ELOOP; - goto out; - } - - op_ret = -1; - SET_FS_ID (frame->root->uid, frame->root->gid); - - if (priv->o_direct) - flags |= O_DIRECT; - - _fd = open (real_path, flags, 0); - if (_fd == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "open on %s, flags: %d: %s", - real_path, flags, strerror (op_errno)); - goto out; - } - - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - op_errno = errno; - goto out; - } - - pfd->flags = flags; - pfd->fd = _fd; - - op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); - if (op_ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to set the fd context path=%s fd=%p", - real_path, fd); - - LOCK (&priv->lock); - { - priv->nr_files++; - } - UNLOCK (&priv->lock); - - op_ret = 0; - -out: - if (op_ret == -1) { - if (_fd != -1) { - close (_fd); - } - } - - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); - - return 0; -} - -int -posix_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct posix_private * priv = NULL; - struct iobuf * iobuf = NULL; - struct iobref * iobref = NULL; - struct iovec vec = {0,}; - struct posix_fd * pfd = NULL; - struct iatt stbuf = {0,}; - int ret = -1; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto out; - } - - if (!size) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); - goto out; - } - - iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); - if (!iobuf) { - op_errno = ENOMEM; - goto out; - } - - _fd = pfd->fd; - op_ret = pread (_fd, iobuf->ptr, size, offset); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "read failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - LOCK (&priv->lock); - { - priv->read_value += op_ret; - } - UNLOCK (&priv->lock); - - vec.iov_base = iobuf->ptr; - vec.iov_len = op_ret; - - iobref = iobref_new (); - - iobref_add (iobref, iobuf); - - /* - * readv successful, and we need to get the stat of the file - * we read from - */ - - op_ret = posix_fdstat (this, _fd, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - /* Hack to notify higher layers of EOF. */ - if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) - op_errno = ENOENT; - - op_ret = vec.iov_len; -out: - - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - &vec, 1, &stbuf, iobref, NULL); - - if (iobref) - iobref_unref (iobref); - if (iobuf) - iobuf_unref (iobuf); - - return 0; -} - - -int32_t -__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset) -{ - int32_t op_ret = 0; - int idx = 0; - int retval = 0; - off_t internal_off = 0; - - if (!vector) - return -EFAULT; - - internal_off = offset; - for (idx = 0; idx < count; idx++) { - retval = pwrite (fd, vector[idx].iov_base, vector[idx].iov_len, - internal_off); - if (retval == -1) { - op_ret = -errno; - goto err; - } - op_ret += retval; - internal_off += retval; - } - -err: - return op_ret; -} - -int32_t -__posix_writev (int fd, struct iovec *vector, int count, off_t startoff, - int odirect) -{ - int32_t op_ret = 0; - int idx = 0; - int max_buf_size = 0; - int retval = 0; - char *buf = NULL; - char *alloc_buf = NULL; - off_t internal_off = 0; - - /* Check for the O_DIRECT flag during open() */ - if (!odirect) - return __posix_pwritev (fd, vector, count, startoff); - - for (idx = 0; idx < count; idx++) { - if (max_buf_size < vector[idx].iov_len) - max_buf_size = vector[idx].iov_len; - } - - alloc_buf = _page_aligned_alloc (max_buf_size, &buf); - if (!alloc_buf) { - op_ret = -errno; - goto err; - } - - internal_off = startoff; - for (idx = 0; idx < count; idx++) { - memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); - - /* not sure whether writev works on O_DIRECT'd fd */ - retval = pwrite (fd, buf, vector[idx].iov_len, internal_off); - if (retval == -1) { - op_ret = -errno; - goto err; - } - - op_ret += retval; - internal_off += retval; - } - -err: - GF_FREE (alloc_buf); - - return op_ret; -} - -dict_t* -_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) -{ - dict_t *rsp_xdata = NULL; - int32_t ret = 0; - inode_t *inode = NULL; - - if (fd) - inode = fd->inode; - - if (!fd || !fd->inode || gf_uuid_is_null (fd->inode->gfid)) { - gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: " - "fd: %p inode: %p gfid:%s", fd, inode?inode:0, - inode?uuid_utoa(inode->gfid):"N/A"); - goto out; - } - - if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) - goto out; - - rsp_xdata = dict_new(); - if (!rsp_xdata) - goto out; - - ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, - fd->inode->fd_count); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " - "dictionary value for %s", uuid_utoa (fd->inode->gfid), - GLUSTERFS_OPEN_FD_COUNT); - } - - ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, - is_append); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " - "dictionary value for %s", uuid_utoa (fd->inode->gfid), - GLUSTERFS_WRITE_IS_APPEND); - } -out: - return rsp_xdata; -} - -int32_t -posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct posix_private * priv = NULL; - struct posix_fd * pfd = NULL; - struct iatt preop = {0,}; - struct iatt postop = {0,}; - int ret = -1; - dict_t *rsp_xdata = NULL; - int is_append = 0; - gf_boolean_t locked = _gf_false; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (vector, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - op_errno = -ret; - goto out; - } - - _fd = pfd->fd; - - if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { - /* The write_is_append check and write must happen - atomically. Else another write can overtake this - write after the check and get written earlier. - - So lock before preop-stat and unlock after write. - */ - locked = _gf_true; - LOCK(&fd->inode->lock); - } - - op_ret = posix_fdstat (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - if (locked) { - if (preop.ia_size == offset || (fd->flags & O_APPEND)) - is_append = 1; - } - - op_ret = __posix_writev (_fd, vector, count, offset, - (pfd->flags & O_DIRECT)); - - if (locked) { - UNLOCK (&fd->inode->lock); - locked = _gf_false; - } - - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64 - ", %s", offset, strerror (op_errno)); - goto out; - } - - LOCK (&priv->lock); - { - priv->write_value += op_ret; - } - UNLOCK (&priv->lock); - - if (op_ret >= 0) { - rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); - /* wiretv successful, we also need to get the stat of - * the file we wrote to - */ - - if (flags & (O_SYNC|O_DSYNC)) { - ret = fsync (_fd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "fsync() in writev on fd %d failed: %s", - _fd, strerror (errno)); - op_ret = -1; - op_errno = errno; - goto out; - } - } - - ret = posix_fdstat (this, _fd, &postop); - if (ret == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation fstat failed on fd=%p: %s", - fd, strerror (op_errno)); - goto out; - } - } - -out: - - if (locked) { - UNLOCK (&fd->inode->lock); - locked = _gf_false; - } - - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, - rsp_xdata); - - if (rsp_xdata) - dict_unref (rsp_xdata); - return 0; -} - - -int32_t -posix_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xdata) -{ - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct statvfs buf = {0, }; - struct posix_private * priv = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (this->private, out); - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - priv = this->private; - - op_ret = statvfs (real_path, &buf); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "statvfs failed on %s: %s", - real_path, strerror (op_errno)); - goto out; - } - - if (!priv->export_statfs) { - buf.f_blocks = 0; - buf.f_bfree = 0; - buf.f_bavail = 0; - buf.f_files = 0; - buf.f_ffree = 0; - buf.f_favail = 0; - } - - op_ret = 0; - -out: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); - return 0; -} - - -int32_t -posix_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - struct posix_fd *pfd = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL on fd=%p", fd); - goto out; - } - - op_ret = 0; - -out: - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); - - return 0; -} - - -int32_t -posix_release (xlator_t *this, fd_t *fd) -{ - struct posix_private * priv = NULL; - struct posix_fd * pfd = NULL; - int ret = -1; - uint64_t tmp_pfd = 0; - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; - - if (pfd->dir) { - gf_log (this->name, GF_LOG_WARNING, - "pfd->dir is %p (not NULL) for file fd=%p", - pfd->dir, fd); - } - - pthread_mutex_lock (&priv->janitor_lock); - { - INIT_LIST_HEAD (&pfd->list); - list_add_tail (&pfd->list, &priv->janitor_fds); - pthread_cond_signal (&priv->janitor_cond); - } - pthread_mutex_unlock (&priv->janitor_lock); - - LOCK (&priv->lock); - { - priv->nr_files--; - } - UNLOCK (&priv->lock); - -out: - return 0; -} - - -int -posix_batch_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync, dict_t *xdata) -{ - call_stub_t *stub = NULL; - struct posix_private *priv = NULL; - - priv = this->private; - - stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); - return 0; - } - - pthread_mutex_lock (&priv->fsync_mutex); - { - list_add_tail (&stub->list, &priv->fsyncs); - priv->fsync_queue_count++; - pthread_cond_signal (&priv->fsync_cond); - } - pthread_mutex_unlock (&priv->fsync_mutex); - - return 0; -} - - -int32_t -posix_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct posix_fd * pfd = NULL; - int ret = -1; - struct iatt preop = {0,}; - struct iatt postop = {0,}; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - -#ifdef GF_DARWIN_HOST_OS - /* Always return success in case of fsync in MAC OS X */ - op_ret = 0; - goto out; -#endif - - priv = this->private; - if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { - posix_batch_fsync (frame, this, fd, datasync, xdata); - return 0; - } - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd not found in fd's ctx"); - goto out; - } - - _fd = pfd->fd; - - op_ret = posix_fdstat (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "pre-operation fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - if (datasync) { - op_ret = sys_fdatasync (_fd); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fdatasync on fd=%p failed: %s", - fd, strerror (errno)); - goto out; - } - } else { - op_ret = sys_fsync (_fd); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "fsync on fd=%p failed: %s", - fd, strerror (op_errno)); - goto out; - } - } - - op_ret = posix_fdstat (this, _fd, &postop); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "post-operation fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, - NULL); - - return 0; -} - -static int gf_posix_xattr_enotsup_log; -static int -_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) -{ - posix_xattr_filler_t *filler = NULL; - - filler = tmp; - - return posix_handle_pair (filler->this, filler->real_path, k, v, - filler->flags); -} - -#ifdef GF_DARWIN_HOST_OS -static inline int -map_xattr_flags(int flags) -{ - /* DARWIN has different defines on XATTR_ flags. - There do not seem to be a POSIX standard - Parse any other flags over. - */ - int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); - if (GF_XATTR_CREATE & flags) - darwinflags |= XATTR_CREATE; - if (GF_XATTR_REPLACE & flags) - darwinflags |= XATTR_REPLACE; - return darwinflags; -} -#endif - -int32_t -posix_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - - posix_xattr_filler_t filler = {0,}; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (dict, out); - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = -1; - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - - filler.real_path = real_path; - filler.this = this; -#ifdef GF_DARWIN_HOST_OS - filler.flags = map_xattr_flags(flags); -#else - filler.flags = flags; -#endif - op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, - &filler); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); - - return 0; -} - - -int -posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *key, dict_t *dict, dict_t *xdata) -{ - char *real_path = NULL; - struct dirent *dirent = NULL; - DIR *fd = NULL; - const char *fname = NULL; - char *found = NULL; - int ret = -1; - int op_ret = -1; - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - return -ESTALE; - } - - fd = opendir (real_path); - if (!fd) - return -errno; - - fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); - - while ((dirent = readdir (fd))) { - if (strcasecmp (dirent->d_name, fname) == 0) { - found = gf_strdup (dirent->d_name); - if (!found) { - closedir (fd); - return -ENOMEM; - } - break; - } - } - - closedir (fd); - - if (!found) - return -ENOENT; - - ret = dict_set_dynstr (dict, (char *)key, found); - if (ret) { - GF_FREE (found); - return -ENOMEM; - } - ret = strlen (found) + 1; - - return ret; -} - -int -posix_get_ancestry_directory (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, - int32_t *op_errno, dict_t *xdata) -{ - ssize_t handle_size = 0; - struct posix_private *priv = NULL; - char dirpath[PATH_MAX+1] = {0,}; - inode_t *inode = NULL; - int ret = -1; - - priv = this->private; - - handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - - ret = posix_make_ancestryfromgfid (this, dirpath, PATH_MAX + 1, head, - type | POSIX_ANCESTRY_PATH, - leaf_inode->gfid, - handle_size, priv->base_path, - leaf_inode->table, &inode, xdata); - if (ret < 0) - goto out; - - - /* there is already a reference in loc->inode */ - inode_unref (inode); - - if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) { - if (strcmp (dirpath, "/")) - dirpath[strlen (dirpath) - 1] = '\0'; - - *path = gf_strdup (dirpath); - } - -out: - return ret; -} - -int32_t -posix_links_in_same_directory (char *dirpath, int count, inode_t *leaf_inode, - inode_t *parent, uint64_t ino, - gf_dirent_t *head, char **path, - int type, dict_t *xdata, int32_t *op_errno) -{ - DIR *dirp = NULL; - int op_ret = -1; - struct dirent *entry = NULL; - struct dirent *result = NULL; - inode_t *linked_inode = NULL; - gf_dirent_t *gf_entry = NULL; - char temppath[PATH_MAX+1] = {0,}; - xlator_t *this = NULL; - struct posix_private *priv = NULL; - char *tempv = NULL; - - this = THIS; - - priv = this->private; - - dirp = opendir (dirpath); - if (!dirp) { - *op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "could not opendir %s: %s", dirpath, - strerror (*op_errno)); - goto out; - } - - entry = alloca (offsetof(struct dirent, d_name) + NAME_MAX + 1); - if (entry == NULL) - goto out; - - while (count > 0) { - *op_errno = readdir_r (dirp, entry, &result); - if ((result == NULL) || *op_errno) - break; - - if (entry->d_ino != ino) - continue; - - linked_inode = inode_link (leaf_inode, parent, - entry->d_name, NULL); - - GF_ASSERT (linked_inode == leaf_inode); - inode_unref (linked_inode); - - if (type & POSIX_ANCESTRY_DENTRY) { - loc_t loc = {0, }; - - loc.inode = inode_ref (leaf_inode); - gf_uuid_copy (loc.gfid, leaf_inode->gfid); - - strcpy (temppath, dirpath); - strcat (temppath, "/"); - strcat (temppath, entry->d_name); - - gf_entry = gf_dirent_for_name (entry->d_name); - gf_entry->inode = inode_ref (leaf_inode); - gf_entry->dict - = posix_xattr_fill (this, temppath, &loc, NULL, - -1, xdata, NULL); - list_add_tail (&gf_entry->list, &head->list); - loc_wipe (&loc); - } - - if (type & POSIX_ANCESTRY_PATH) { - strcpy (temppath, - &dirpath[priv->base_path_length]); - strcat (temppath, "/"); - strcat (temppath, entry->d_name); - if (!*path) { - *path = gf_strdup (temppath); - } else { - /* creating a colon separated */ - /* list of hard links */ - tempv = GF_REALLOC (*path, strlen (*path) - + 1 // ':' - + strlen (temppath) + 1 ); - if (!tempv) { - gf_log (this->name, GF_LOG_WARNING, - "realloc failed on path"); - GF_FREE (*path); - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - *path = tempv; - strcat (*path, ":"); - strcat (*path, temppath); - } - } - - count--; - } - -out: - if (dirp) { - op_ret = closedir (dirp); - if (op_ret == -1) { - *op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "closedir failed: %s", - strerror (*op_errno)); - } - } - - return op_ret; -} - -int -posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, - int32_t *op_errno, dict_t *xdata) -{ - size_t remaining_size = 0; - char dirpath[PATH_MAX+1] = {0,}, *leaf_path = NULL; - int op_ret = -1, pathlen = -1; - ssize_t handle_size = 0; - char pgfidstr[UUID_CANONICAL_FORM_LEN+1] = {0,}; - uuid_t pgfid = {0, }; - int nlink_samepgfid = 0; - struct stat stbuf = {0,}; - char *list = NULL; - int32_t list_offset = 0; - char key[4096] = {0,}; - struct posix_private *priv = NULL; - ssize_t size = 0; - inode_t *parent = NULL; - loc_t *loc = NULL; - - priv = this->private; - - loc = GF_CALLOC (1, sizeof (*loc), gf_posix_mt_char); - if (loc == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - gf_uuid_copy (loc->gfid, leaf_inode->gfid); - - MAKE_INODE_HANDLE (leaf_path, this, loc, NULL); - if (!leaf_path) { - GF_FREE (loc); - *op_errno = ESTALE; - goto out; - } - GF_FREE (loc); - - size = sys_llistxattr (leaf_path, NULL, 0); - if (size == -1) { - *op_errno = errno; - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting brick" - " with 'user_xattr' flag)"); - - } else { - gf_log (this->name, GF_LOG_WARNING, - "listxattr failed on %s: %s", - leaf_path, strerror (*op_errno)); - - } - - goto out; - } - - if (size == 0) { - op_ret = 0; - goto out; - } - - list = alloca (size); - if (!list) { - *op_errno = errno; - goto out; - } - - size = sys_llistxattr (leaf_path, list, size); - if (size < 0) { - op_ret = -1; - *op_errno = errno; - goto out; - } - remaining_size = size; - list_offset = 0; - - op_ret = sys_lstat (leaf_path, &stbuf); - if (op_ret == -1) { - *op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "lstat failed" - " on %s: %s", leaf_path, - strerror (*op_errno)); - goto out; - } - - while (remaining_size > 0) { - strcpy (key, list + list_offset); - if (strncmp (key, PGFID_XATTR_KEY_PREFIX, - strlen (PGFID_XATTR_KEY_PREFIX)) != 0) - goto next; - - op_ret = sys_lgetxattr (leaf_path, key, - &nlink_samepgfid, - sizeof(nlink_samepgfid)); - if (op_ret == -1) { - *op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on " - "%s: key = %s (%s)", - leaf_path, - key, - strerror (*op_errno)); - goto out; - } - - nlink_samepgfid = ntoh32 (nlink_samepgfid); - - strcpy (pgfidstr, key + strlen(PGFID_XATTR_KEY_PREFIX)); - gf_uuid_parse (pgfidstr, pgfid); - - handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - - /* constructing the absolute real path of parent dir */ - strcpy (dirpath, priv->base_path); - pathlen = PATH_MAX + 1 - priv->base_path_length; - - op_ret = posix_make_ancestryfromgfid (this, - dirpath + priv->base_path_length, - pathlen, - head, - type | POSIX_ANCESTRY_PATH, - pgfid, - handle_size, - priv->base_path, - leaf_inode->table, - &parent, xdata); - if (op_ret < 0) { - goto next; - } - - dirpath[strlen (dirpath) - 1] = '\0'; - - posix_links_in_same_directory (dirpath, nlink_samepgfid, - leaf_inode, - parent, stbuf.st_ino, head, - path, type, xdata, op_errno); - - if (parent != NULL) { - inode_unref (parent); - parent = NULL; - } - - next: - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; - } /* while (remaining_size > 0) */ - - op_ret = 0; - -out: - return op_ret; -} - -int -posix_get_ancestry (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, int32_t *op_errno, - dict_t *xdata) -{ - int ret = -1; - struct posix_private *priv = NULL; - - priv = this->private; - - if (!priv->update_pgfid_nlinks) - goto out; - - if (IA_ISDIR (leaf_inode->ia_type)) { - ret = posix_get_ancestry_directory (this, leaf_inode, - head, path, type, op_errno, - xdata); - } else { - ret = posix_get_ancestry_non_directory (this, leaf_inode, - head, path, type, - op_errno, xdata); - } - -out: - return ret; -} - -/** - * posix_getxattr - this function returns a dictionary with all the - * key:value pair present as xattr. used for - * both 'listxattr' and 'getxattr'. - */ -int32_t -posix_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - struct posix_private *priv = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - char host_buf[1024] = {0,}; - char *value = NULL; - char *real_path = NULL; - dict_t *dict = NULL; - char *file_contents = NULL; - int ret = -1; - char *path = NULL; - char *rpath = NULL; - char *dyn_rpath = NULL; - ssize_t size = 0; - char *list = NULL; - int32_t list_offset = 0; - size_t remaining_size = 0; - char keybuffer[4096] = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - - op_ret = -1; - priv = this->private; - - if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && - ZR_FILE_CONTENT_REQUEST(name)) { - ret = posix_get_file_contents (this, loc->gfid, &name[15], - &file_contents); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, - "getting file contents failed: %s", - strerror (op_errno)); - goto out; - } - } - - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; - goto out; - } - - if (loc->inode && name && GF_POSIX_ACL_REQUEST (name)) { - ret = posix_pacl_get (real_path, name, &value); - if (ret || !value) { - gf_log (this->name, GF_LOG_WARNING, - "could not get acl (%s) for %s: %s", name, - real_path, strerror (errno)); - op_ret = -1; - op_errno = errno; - goto out; - } - - ret = dict_set_dynstr (dict, (char *)name, value); - if (ret < 0) { - GF_FREE (value); - gf_log (this->name, GF_LOG_WARNING, - "could not set acl (%s) for %s in dictionary: " - "(%s)", name, real_path, strerror (errno)); - op_ret = -1; - op_errno = errno; - goto out; - } - - size = ret; - goto done; - } - - if (loc->inode && name && - (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, - strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { - ret = posix_xattr_get_real_filename (frame, this, loc, - name, dict, xdata); - if (ret < 0) { - op_ret = -1; - op_errno = -ret; - gf_log (this->name, (op_errno == ENOENT) ? - GF_LOG_DEBUG : GF_LOG_WARNING, - "Failed to get real filename (%s, %s): %s", - loc->path, name, strerror (op_errno)); - goto out; - } - - size = ret; - goto done; - } - - if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { - if (!list_empty (&loc->inode->fd_list)) { - ret = dict_set_uint32 (dict, (char *)name, 1); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", - name); - } else { - ret = dict_set_uint32 (dict, (char *)name, 0); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", - name); - } - goto done; - } - if (loc->inode && name && (XATTR_IS_PATHINFO (name))) { - if (LOC_HAS_ABSPATH (loc)) - MAKE_REAL_PATH (rpath, this, loc->path); - else - rpath = real_path; - - (void) snprintf (host_buf, 1024, - "<POSIX(%s):%s:%s>", priv->base_path, - ((priv->node_uuid_pathinfo - && !gf_uuid_is_null(priv->glusterd_uuid)) - ? uuid_utoa (priv->glusterd_uuid) - : priv->hostname), - rpath); - - dyn_rpath = gf_strdup (host_buf); - if (!dyn_rpath) { - ret = -1; - goto done; - } - size = strlen (dyn_rpath) + 1; - ret = dict_set_dynstr (dict, (char *)name, dyn_rpath); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "could not set value (%s) in dictionary", - dyn_rpath); - GF_FREE (dyn_rpath); - } - - goto done; - } - - if (loc->inode && name && - (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0) - && !gf_uuid_is_null (priv->glusterd_uuid)) { - (void) snprintf (host_buf, 1024, "%s", - uuid_utoa (priv->glusterd_uuid)); - - dyn_rpath = gf_strdup (host_buf); - if (!dyn_rpath) { - op_errno = ENOMEM; - goto out; - } - - size = strlen (dyn_rpath) + 1; - ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY, - dyn_rpath); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "could not set value (%s) in dictionary", - dyn_rpath); - GF_FREE (dyn_rpath); - op_errno = -ret; - goto out; - } - goto done; - } - - if (loc->inode && name && - (strcmp (name, GFID_TO_PATH_KEY) == 0)) { - ret = inode_path (loc->inode, NULL, &path); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, "%s: could not get " - "inode path", uuid_utoa (loc->inode->gfid)); - goto out; - } - - size = ret; - ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path); - if (ret < 0) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "could not set value (%s) in dictionary", - host_buf); - GF_FREE (path); - goto out; - } - goto done; - } - - if (loc->inode && name - && (strcmp (name, GET_ANCESTRY_PATH_KEY) == 0)) { - int type = POSIX_ANCESTRY_PATH; - - op_ret = posix_get_ancestry (this, loc->inode, NULL, - &path, type, &op_errno, - xdata); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENODATA; - goto out; - } - - op_ret = dict_set_dynstr (dict, GET_ANCESTRY_PATH_KEY, path); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "could not get " - "value for key (%s)", GET_ANCESTRY_PATH_KEY); - GF_FREE (path); - op_errno = -op_ret; - op_ret = -1; - } - - goto done; - } - - if (loc->inode && name - && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, - strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { - op_ret = posix_get_objectsignature (real_path, dict); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - } - - goto done; - } - - if (name) { - strcpy (keybuffer, name); - char *key = keybuffer; -#if defined(GF_DARWIN_HOST_OS_DISABLED) - if (priv->xattr_user_namespace == XATTR_STRIP) { - if (strncmp(key, "user.",5) == 0) { - key += 5; - gf_log (this->name, - GF_LOG_DEBUG, - "getxattr for file %s" - " stripping user key: %s -> %s", - real_path, keybuffer, key); - } - } -#endif - size = sys_lgetxattr (real_path, key, NULL, 0); - if (size == -1) { - op_errno = errno; - if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " - "flag)"); - } else if (op_errno == ENOATTR || - op_errno == ENODATA) { - gf_log (this->name, GF_LOG_DEBUG, - "No such attribute:%s for file %s", - key, real_path); - } else { - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on %s: %s (%s)", - real_path, key, strerror (op_errno)); - } - - goto done; - } - value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - size = sys_lgetxattr (real_path, key, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " - "%s: key = %s (%s)", real_path, key, - strerror (op_errno)); - GF_FREE (value); - goto out; - } - value [size] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, size); - if (op_ret < 0) { - op_errno = -op_ret; - gf_log (this->name, GF_LOG_ERROR, "dict set operation " - "on %s for the key %s failed.", real_path, key); - GF_FREE (value); - goto out; - } - - goto done; - } - - size = sys_llistxattr (real_path, NULL, 0); - if (size == -1) { - op_errno = errno; - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " - "flag)"); - } - else { - gf_log (this->name, GF_LOG_ERROR, - "listxattr failed on %s: %s", - real_path, strerror (op_errno)); - } - goto out; - } - - if (size == 0) - goto done; - - list = alloca (size); - if (!list) { - op_errno = errno; - goto out; - } - - size = sys_llistxattr (real_path, list, size); - if (size < 0) { - op_ret = -1; - op_errno = errno; - goto out; - } - - remaining_size = size; - list_offset = 0; - while (remaining_size > 0) { - strcpy (keybuffer, list + list_offset); - size = sys_lgetxattr (real_path, keybuffer, NULL, 0); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " - "%s: key = %s (%s)", real_path, keybuffer, - strerror (op_errno)); - break; - } - - value = GF_CALLOC (size + 1, sizeof(char), - gf_posix_mt_char); - if (!value) { - op_errno = errno; - goto out; - } - - size = sys_lgetxattr (real_path, keybuffer, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " - "%s: key = %s (%s)", real_path, keybuffer, - strerror (op_errno)); - GF_FREE (value); - break; - } - - value [size] = '\0'; -#ifdef GF_DARWIN_HOST_OS - /* The protocol expect namespace for now */ - char *newkey = NULL; - gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey); - strcpy (keybuffer, newkey); - GF_FREE (newkey); -#endif - op_ret = dict_set_dynptr (dict, keybuffer, value, size); - if (op_ret < 0) { - op_errno = -op_ret; - gf_log (this->name, GF_LOG_ERROR, "dict set operation " - "on %s for the key %s failed.", real_path, - keybuffer); - GF_FREE (value); - goto out; - } - - remaining_size -= strlen (keybuffer) + 1; - list_offset += strlen (keybuffer) + 1; - - } /* while (remaining_size > 0) */ - -done: - op_ret = size; - - if (dict) { - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); - - if (dict) { - dict_unref (dict); - } - - return 0; -} - - -int32_t -posix_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = ENOENT; - struct posix_fd * pfd = NULL; - int _fd = -1; - int32_t list_offset = 0; - ssize_t size = 0; - size_t remaining_size = 0; - char key[4096] = {0,}; - char * value = NULL; - char * list = NULL; - dict_t * dict = NULL; - int ret = -1; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto out; - } - - _fd = pfd->fd; - - /* Get the total size */ - dict = get_new_dict (); - if (!dict) { - goto out; - } - - if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { - ret = dict_set_uint32 (dict, (char *)name, 1); - if (ret < 0) - gf_log (this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", - name); - goto done; - } - - if (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, - strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) { - op_ret = posix_fdget_objectsignature (_fd, dict); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - } - - goto done; - } - - if (name) { - strcpy (key, name); -#ifdef GF_DARWIN_HOST_OS - struct posix_private *priv = NULL; - priv = this->private; - if (priv->xattr_user_namespace == XATTR_STRIP) { - char *newkey = NULL; - gf_add_prefix (XATTR_USER_PREFIX, key, &newkey); - strcpy (key, newkey); - GF_FREE (newkey); - } -#endif - size = sys_fgetxattr (_fd, key, NULL, 0); - if (size == -1) { - op_errno = errno; - gf_log (this->name, - ((errno == ENODATA || errno == ENOATTR) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "fgetxattr failed on key %s (%s)", key, - strerror (op_errno)); - goto done; - } - - value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - goto out; - } - size = sys_fgetxattr (_fd, key, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " - "fd %p for the key %s (%s)", fd, key, - strerror (op_errno)); - GF_FREE (value); - goto out; - } - value [size] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, size); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "dict set operation " - "on key %s failed", key); - GF_FREE (value); - goto out; - } - goto done; - } - - size = sys_flistxattr (_fd, NULL, 0); - if (size == -1) { - op_errno = errno; - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting " - "brick with 'user_xattr' flag)"); - } - else { - gf_log (this->name, GF_LOG_ERROR, - "listxattr failed on %p: %s", - fd, strerror (op_errno)); - } - goto out; - } - - if (size == 0) - goto done; - - list = alloca (size + 1); - if (!list) { - op_errno = errno; - goto out; - } - - size = sys_flistxattr (_fd, list, size); - - remaining_size = size; - list_offset = 0; - while (remaining_size > 0) { - if(*(list + list_offset) == '\0') - break; - - strcpy (key, list + list_offset); - size = sys_fgetxattr (_fd, key, NULL, 0); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " - "fd %p for the key %s (%s)", fd, key, - strerror (op_errno)); - break; - } - - value = GF_CALLOC (size + 1, sizeof(char), - gf_posix_mt_char); - if (!value) { - op_ret = -1; - op_errno = errno; - goto out; - } - - size = sys_fgetxattr (_fd, key, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " - "the fd %p for the key %s (%s)", fd, key, - strerror (op_errno)); - GF_FREE (value); - break; - } - - value [size] = '\0'; - - op_ret = dict_set_dynptr (dict, key, value, size); - if (op_ret) { - gf_log (this->name, GF_LOG_ERROR, "dict set operation " - "failed on key %s", key); - GF_FREE (value); - goto out; - } - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; - - } /* while (remaining_size > 0) */ - -done: - op_ret = size; - - if (dict) { - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - dict_ref (dict); - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); - - if (dict) - dict_unref (dict); - - return 0; -} - -static int -_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) -{ - posix_xattr_filler_t *filler = NULL; - - filler = tmp; - - return posix_fhandle_pair (filler->this, filler->fdnum, k, v, - filler->flags); -} - -int32_t -posix_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_fd * pfd = NULL; - int _fd = -1; - int ret = -1; - - posix_xattr_filler_t filler = {0,}; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (dict, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto out; - } - _fd = pfd->fd; - - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - - filler.fdnum = _fd; - filler.this = this; -#ifdef GF_DARWIN_HOST_OS - filler.flags = map_xattr_flags(flags); -#else - filler.flags = flags; -#endif - op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, - &filler); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - } - - if (!ret && xdata && dict_get (xdata, GLUSTERFS_DURABLE_OP)) { - op_ret = fsync (_fd); - if (op_ret < 0) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "could not satisfy durability request: " - "reason (%s)", strerror (errno)); - } - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); - - return 0; -} - -int -_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) -{ - int32_t op_ret = 0; - xlator_t *this = NULL; - posix_xattr_filler_t *filler = NULL; - - filler = (posix_xattr_filler_t *) data; - this = filler->this; -#ifdef GF_DARWIN_HOST_OS - struct posix_private *priv = NULL; - priv = (struct posix_private *) this->private; - char *newkey = NULL; - if (priv->xattr_user_namespace == XATTR_STRIP) { - gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey); - gf_log("remove_xattr", GF_LOG_DEBUG, "key %s => %s" , key, - newkey); - key = newkey; - } -#endif - /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may - * have special behavior. Ex: removexattr("posix.system_acl_access"), - * removes more than one xattr on the file that could be present in the - * bulk-removal request. Removexattr of these deleted xattrs will fail - * with either ENODATA/ENOATTR. Since all this fop cares is removal of the - * xattrs in bulk-remove request and if they are already deleted, it can be - * treated as success. - */ - - op_ret = sys_lremovexattr (filler->real_path, key); - if (op_ret == -1) { - if (errno == ENODATA || errno == ENOATTR) - op_ret = 0; - } - - if (op_ret == -1) { - filler->op_errno = errno; - if (errno != ENOATTR && errno != ENODATA && errno != EPERM) - gf_log (this->name, GF_LOG_ERROR, - "removexattr failed on %s (for %s): %s", - filler->real_path, key, strerror (errno)); - } -#ifdef GF_DARWIN_HOST_OS - GF_FREE(newkey); -#endif - return op_ret; -} - - -int32_t -posix_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - posix_xattr_filler_t filler = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - - if (!strcmp (GFID_XATTR_KEY, name)) { - gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" - " on gfid for file %s", real_path); - op_ret = -1; - goto out; - } - if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) { - gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" - " on volume-id for file %s", real_path); - op_ret = -1; - goto out; - } - - - SET_FS_ID (frame->root->uid, frame->root->gid); - - /** - * sending an empty key name with xdata containing the - * list of key(s) to be removed implies "bulk remove request" - * for removexattr. - */ - if (name && (strcmp (name, "") == 0) && xdata) { - filler.real_path = real_path; - filler.this = this; - op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); - if (op_ret) { - op_errno = filler.op_errno; - } - - goto out; - } - - op_ret = sys_lremovexattr (real_path, name); - if (op_ret == -1) { - op_errno = errno; - if (op_errno != ENOATTR && op_errno != ENODATA && - op_errno != EPERM) - gf_log (this->name, GF_LOG_ERROR, - "removexattr on %s (for %s): %s", real_path, - name, strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); - return 0; -} - -int32_t -posix_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_fd * pfd = NULL; - int _fd = -1; - int ret = -1; - - DECLARE_OLD_FS_ID_VAR; - - if (!strcmp (GFID_XATTR_KEY, name)) { - gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" - " on gfid for file"); - goto out; - } - if (!strcmp (GF_XATTR_VOL_ID_KEY, name)) { - gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" - " on volume-id for file"); - goto out; - } - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL from fd=%p", fd); - goto out; - } - _fd = pfd->fd; - - - - SET_FS_ID (frame->root->uid, frame->root->gid); - - op_ret = sys_fremovexattr (_fd, name); - if (op_ret == -1) { - op_errno = errno; - if (op_errno != ENOATTR && op_errno != ENODATA && - op_errno != EPERM) - gf_log (this->name, GF_LOG_ERROR, - "fremovexattr (for %s): %s", - name, strerror (op_errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); - return 0; -} - - -int32_t -posix_fsyncdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - struct posix_fd *pfd = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - goto out; - } - - op_ret = 0; - -out: - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); - - return 0; -} - - -void -posix_print_xattr (dict_t *this, - char *key, - data_t *value, - void *data) -{ - gf_log ("posix", GF_LOG_DEBUG, - "(key/val) = (%s/%d)", key, data_to_int32 (value)); -} - - -/** - * add_array - add two arrays of 32-bit numbers (stored in network byte order) - * dest = dest + src - * @count: number of 32-bit numbers - * FIXME: handle overflow - */ - -static void -__add_array (int32_t *dest, int32_t *src, int count) -{ - int i = 0; - int32_t destval = 0; - for (i = 0; i < count; i++) { - destval = ntoh32 (dest[i]); - dest[i] = hton32 (destval + ntoh32 (src[i])); - } -} - -static void -__add_long_array (int64_t *dest, int64_t *src, int count) -{ - int i = 0; - for (i = 0; i < count; i++) { - dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i])); - } -} - -static int -_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) -{ - int size = 0; - int count = 0; - int op_ret = 0; - int op_errno = 0; - gf_xattrop_flags_t optype = 0; - char *array = NULL; - inode_t *inode = NULL; - xlator_t *this = NULL; - posix_xattr_filler_t *filler = NULL; - - filler = tmp; - - optype = (gf_xattrop_flags_t)(filler->flags); - this = filler->this; - inode = filler->inode; - count = v->len; - array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); - -#ifdef GF_DARWIN_HOST_OS - struct posix_private *priv = NULL; - priv = this->private; - if (priv->xattr_user_namespace == XATTR_STRIP) { - if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { - k += XATTR_USER_PREFIX_LEN; - } - } -#endif - - LOCK (&inode->lock); - { - if (filler->real_path) { - size = sys_lgetxattr (filler->real_path, k, - (char *)array, v->len); - } else { - size = sys_fgetxattr (filler->fdnum, k, (char *)array, - v->len); - } - - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported by filesystem"); - } else if (op_errno != ENOENT || - !posix_special_xattr (marker_xattrs, - k)) { - if (filler->real_path) - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on %s while doing " - "xattrop: Key:%s (%s)", - filler->real_path, - k, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fgetxattr failed on gfid=%s " - "while doing xattrop: " - "Key:%s (%s)", - uuid_utoa (filler->inode->gfid), - k, strerror (op_errno)); - } - - op_ret = -1; - goto unlock; - } - - switch (optype) { - - case GF_XATTROP_ADD_ARRAY: - __add_array ((int32_t *) array, (int32_t *) v->data, - v->len / 4); - break; - - case GF_XATTROP_ADD_ARRAY64: - __add_long_array ((int64_t *) array, (int64_t *) v->data, - v->len / 8); - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unknown xattrop type (%d) on %s. Please send " - "a bug report to gluster-devel@gluster.org", - optype, filler->real_path); - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - - if (filler->real_path) { - size = sys_lsetxattr (filler->real_path, k, array, - v->len, 0); - } else { - size = sys_fsetxattr (filler->fdnum, k, (char *)array, - v->len, 0); - } - } -unlock: - UNLOCK (&inode->lock); - - if (op_ret == -1) - goto out; - - op_errno = errno; - if (size == -1) { - if (filler->real_path) - gf_log (this->name, GF_LOG_ERROR, - "setxattr failed on %s while doing xattrop: " - "key=%s (%s)", filler->real_path, - k, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr failed on gfid=%s while doing xattrop: " - "key=%s (%s)", - uuid_utoa (filler->inode->gfid), - k, strerror (op_errno)); - - op_ret = -1; - goto out; - } else { - size = dict_set_bin (d, k, array, v->len); - - if (size != 0) { - if (filler->real_path) - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (path=%s): " - "key=%s (%s)", filler->real_path, - k, strerror (-size)); - else - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (gfid=%s): " - "key=%s (%s)", - uuid_utoa (filler->inode->gfid), - k, strerror (-size)); - - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } - - array = NULL; - -out: - if (op_ret < 0) - filler->op_errno = op_errno; - return op_ret; -} - -/** - * xattrop - xattr operations - for internal use by GlusterFS - * @optype: ADD_ARRAY: - * dict should contain: - * "key" ==> array of 32-bit numbers - */ - -int -do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr) -{ - int op_ret = 0; - int op_errno = 0; - int _fd = -1; - char *real_path = NULL; - struct posix_fd *pfd = NULL; - inode_t *inode = NULL; - posix_xattr_filler_t filler = {0,}; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (xattr, out); - VALIDATE_OR_GOTO (this, out); - - if (fd) { - op_ret = posix_fd_ctx_get (fd, this, &pfd); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to get pfd from fd=%p", - fd); - op_errno = EBADFD; - goto out; - } - _fd = pfd->fd; - } - - if (loc && !gf_uuid_is_null (loc->gfid)) - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - - if (real_path) { - inode = loc->inode; - } else if (fd) { - inode = fd->inode; - } - - filler.this = this; - filler.fdnum = _fd; - filler.real_path = real_path; - filler.flags = (int)optype; - filler.inode = inode; - - op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, - &filler); - op_errno = filler.op_errno; - -out: - - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL); - return 0; -} - - -int -posix_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) -{ - do_xattrop (frame, this, loc, NULL, optype, xattr); - return 0; -} - - -int -posix_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) -{ - do_xattrop (frame, this, NULL, fd, optype, xattr); - return 0; -} - - -int -posix_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = errno; - goto out; - } - - op_ret = access (real_path, mask & 07); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s", - real_path, strerror (op_errno)); - goto out; - } - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); - return 0; -} - - -int32_t -posix_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct iatt preop = {0,}; - struct iatt postop = {0,}; - struct posix_fd *pfd = NULL; - int ret = -1; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto out; - } - - _fd = pfd->fd; - - op_ret = posix_fdstat (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } - - op_ret = ftruncate (_fd, offset); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "ftruncate failed on fd=%p (%"PRId64": %s", - fd, offset, strerror (errno)); - goto out; - } - - op_ret = posix_fdstat (this, _fd, &postop); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "post-operation fstat failed on fd=%p: %s", - fd, strerror (errno)); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, - &postop, NULL); - - return 0; -} - - -int32_t -posix_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) -{ - int _fd = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct iatt buf = {0,}; - struct posix_fd *pfd = NULL; - dict_t *xattr_rsp = NULL; - int ret = -1; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto out; - } - - _fd = pfd->fd; - - op_ret = posix_fdstat (this, _fd, &buf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s", - fd, strerror (op_errno)); - goto out; - } - - if (xdata) - xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, _fd, xdata, - &buf); - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, xattr_rsp); - if (xattr_rsp) - dict_unref (xattr_rsp); - return 0; -} - -static int gf_posix_lk_log; - -int32_t -posix_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) -{ - struct gf_flock nullock = {0, }; - - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); - return 0; -} - -int32_t -posix_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); - return 0; -} - -int32_t -posix_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); - return 0; -} - - -int32_t -posix_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); - return 0; -} - -int32_t -posix_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); - return 0; -} - - -int -posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, - gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) -{ - off_t in_case = -1; - off_t last_off = 0; - size_t filled = 0; - int count = 0; - char entrybuf[sizeof(struct dirent) + 256 + 8]; - struct dirent *entry = NULL; - int32_t this_size = -1; - gf_dirent_t *this_entry = NULL; - struct posix_fd *pfd = NULL; - uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; - struct stat stbuf = {0,}; - char *hpath = NULL; - int len = 0; - int ret = 0; - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - count = -1; - errno = -ret; - goto out; - } - - if (skip_dirs) { - len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); - if (len <= 0) { - errno = ESTALE; - count = -1; - goto out; - } - hpath = alloca (len + 256); /* NAME_MAX */ - - if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, - len) <= 0) { - errno = ESTALE; - count = -1; - goto out; - } - - len = strlen (hpath); - hpath[len] = '/'; - } - - if (!off) { - rewinddir (dir); - } else { - seekdir (dir, off); -#ifndef GF_LINUX_HOST_OS - if ((u_long)telldir(dir) != off && off != pfd->dir_eof) { - gf_log (THIS->name, GF_LOG_ERROR, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", off, dir); - errno = EINVAL; - count = -1; - goto out; - } -#endif /* GF_LINUX_HOST_OS */ - } - - while (filled <= size) { - in_case = (u_long)telldir (dir); - - if (in_case == -1) { - gf_log (THIS->name, GF_LOG_ERROR, - "telldir failed on dir=%p: %s", - dir, strerror (errno)); - goto out; - } - - errno = 0; - entry = NULL; - readdir_r (dir, (struct dirent *)entrybuf, &entry); - - if (!entry) { - if (errno == EBADF) { - gf_log (THIS->name, GF_LOG_WARNING, - "readdir failed on dir=%p: %s", - dir, strerror (errno)); - goto out; - } - break; - } - -#ifdef __NetBSD__ - /* - * NetBSD with UFS1 backend uses backing files for - * extended attributes. They can be found in a - * .attribute file located at the root of the filesystem - * We hide it to glusterfs clients, since chaos will occur - * when the cluster/dht xlator decides to distribute - * exended attribute backing file across storage servers. - */ - if ((gf_uuid_compare (fd->inode->gfid, rootgfid) == 0) - && (!strcmp(entry->d_name, ".attribute"))) - continue; -#endif /* __NetBSD__ */ - - if ((gf_uuid_compare (fd->inode->gfid, rootgfid) == 0) - && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) { - continue; - } - - if (skip_dirs) { - if (DT_ISDIR (entry->d_type)) { - continue; - } else if (hpath) { - strcpy (&hpath[len+1],entry->d_name); - ret = lstat (hpath, &stbuf); - if (!ret && S_ISDIR (stbuf.st_mode)) - continue; - } - } - - this_size = max (sizeof (gf_dirent_t), - sizeof (gfs3_dirplist)) - + strlen (entry->d_name) + 1; - - if (this_size + filled > size) { - seekdir (dir, in_case); -#ifndef GF_LINUX_HOST_OS - if ((u_long)telldir(dir) != in_case && - in_case != pfd->dir_eof) { - gf_log (THIS->name, GF_LOG_ERROR, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", - in_case, dir); - errno = EINVAL; - count = -1; - goto out; - } -#endif /* GF_LINUX_HOST_OS */ - break; - } - - this_entry = gf_dirent_for_name (entry->d_name); - - if (!this_entry) { - gf_log (THIS->name, GF_LOG_ERROR, - "could not create gf_dirent for entry %s: (%s)", - entry->d_name, strerror (errno)); - goto out; - } - /* - * we store the offset of next entry here, which is - * probably not intended, but code using syncop_readdir() - * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it - * for directory read resumption. - */ - last_off = (u_long)telldir(dir); - this_entry->d_off = last_off; - this_entry->d_ino = entry->d_ino; - this_entry->d_type = entry->d_type; - - list_add_tail (&this_entry->list, &entries->list); - - filled += this_size; - count ++; - } - - if ((!readdir (dir) && (errno == 0))) { - /* Indicate EOF */ - errno = ENOENT; - /* Remember EOF offset for later detection */ - pfd->dir_eof = (u_long)last_off; - } -out: - return count; -} - -dict_t * -posix_entry_xattr_fill (xlator_t *this, inode_t *inode, - fd_t *fd, char *name, dict_t *dict, - struct iatt *stbuf) -{ - loc_t tmp_loc = {0,}; - char *entry_path = NULL; - - /* if we don't send the 'loc', open-fd-count be a problem. */ - tmp_loc.inode = inode; - - MAKE_HANDLE_PATH (entry_path, this, fd->inode->gfid, name); - if (!entry_path) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to create handle path for %s/%s", - uuid_utoa (fd->inode->gfid), name); - - return NULL; - } - return posix_xattr_fill (this, entry_path, &tmp_loc, NULL, -1, dict, - stbuf); - -} - - -int -posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) -{ - gf_dirent_t *entry = NULL; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - char *hpath = NULL; - int len = 0; - struct iatt stbuf = {0, }; - uuid_t gfid; - int ret = -1; - if (list_empty(&entries->list)) - return 0; - - itable = fd->inode->table; - - len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); - if (len <= 0) - return -1; - hpath = alloca (len + 256); /* NAME_MAX */ - if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, len) <= 0) - return -1; - len = strlen (hpath); - hpath[len] = '/'; - - list_for_each_entry (entry, &entries->list, list) { - memset (gfid, 0, 16); - inode = inode_grep (fd->inode->table, fd->inode, - entry->d_name); - if (inode) - gf_uuid_copy (gfid, inode->gfid); - - strcpy (&hpath[len+1], entry->d_name); - - ret = posix_pstat (this, gfid, hpath, &stbuf); - - if (ret == -1) - continue; - - if (!inode) - inode = inode_find (itable, stbuf.ia_gfid); - - if (!inode) - inode = inode_new (itable); - - entry->inode = inode; - - if (dict) { - entry->dict = - posix_entry_xattr_fill (this, entry->inode, - fd, entry->d_name, - dict, &stbuf); - } - - entry->d_stat = stbuf; - if (stbuf.ia_ino) - entry->d_ino = stbuf.ia_ino; - inode = NULL; - } - - return 0; -} - - -int32_t -posix_do_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) -{ - struct posix_fd *pfd = NULL; - DIR *dir = NULL; - int ret = -1; - int count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - gf_dirent_t entries; - int32_t skip_dirs = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - INIT_LIST_HEAD (&entries.list); - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto out; - } - - dir = pfd->dir; - - if (!dir) { - gf_log (this->name, GF_LOG_WARNING, - "dir is NULL for fd=%p", fd); - op_errno = EINVAL; - goto out; - } - - /* When READDIR_FILTER option is set to on, we can filter out - * directory's entry from the entry->list. - */ - ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); - - LOCK (&fd->lock); - { - /* posix_fill_readdir performs multiple separate individual - readdir() calls to fill up the buffer. - - In case of NFS where the same anonymous FD is shared between - different applications, reading a common directory can - result in the anonymous fd getting re-used unsafely between - the two readdir requests (in two different io-threads). - - It would also help, in the future, to replace the loop - around readdir() with a single large getdents() call. - */ - count = posix_fill_readdir (fd, dir, off, size, &entries, this, - skip_dirs); - } - UNLOCK (&fd->lock); - - /* pick ENOENT to indicate EOF */ - op_errno = errno; - op_ret = count; - - if (whichop != GF_FOP_READDIRP) - goto out; - - posix_readdirp_fill (this, fd, &entries, dict); - -out: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); - - gf_dirent_free (&entries); - - return 0; -} - - -int32_t -posix_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *xdata) -{ - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); - return 0; -} - - -int32_t -posix_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *dict) -{ - gf_dirent_t entries; - int32_t op_ret = -1, op_errno = 0; - gf_dirent_t *entry = NULL; - - - if ((dict != NULL) && (dict_get (dict, GET_ANCESTRY_DENTRY_KEY))) { - INIT_LIST_HEAD (&entries.list); - - op_ret = posix_get_ancestry (this, fd->inode, &entries, NULL, - POSIX_ANCESTRY_DENTRY, - &op_errno, dict); - if (op_ret >= 0) { - op_ret = 0; - - list_for_each_entry (entry, &entries.list, list) { - op_ret++; - } - } - - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, - NULL); - - gf_dirent_free (&entries); - return 0; - } - - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict); - return 0; -} - -int32_t -posix_priv (xlator_t *this) -{ - struct posix_private *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, - this->name); - gf_proc_dump_add_section(key_prefix); - - if (!this) - return 0; - - priv = this->private; - - if (!priv) - return 0; - - gf_proc_dump_write("base_path","%s", priv->base_path); - gf_proc_dump_write("base_path_length","%d", priv->base_path_length); - gf_proc_dump_write("max_read","%d", priv->read_value); - gf_proc_dump_write("max_write","%d", priv->write_value); - gf_proc_dump_write("nr_files","%ld", priv->nr_files); - - return 0; -} - -int32_t -posix_inode (xlator_t *this) -{ - return 0; -} - - -int32_t -posix_rchecksum (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, int32_t len, dict_t *xdata) -{ - char *alloc_buf = NULL; - char *buf = NULL; - int _fd = -1; - struct posix_fd *pfd = NULL; - int op_ret = -1; - int op_errno = 0; - int ret = 0; - int32_t weak_checksum = 0; - unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; - struct posix_private *priv = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - - alloc_buf = _page_aligned_alloc (len, &buf); - if (!alloc_buf) { - op_errno = ENOMEM; - goto out; - } - - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto out; - } - - _fd = pfd->fd; - - LOCK (&fd->lock); - { - if (priv->aio_capable && priv->aio_init_done) - __posix_fd_set_odirect (fd, pfd, 0, offset, len); - - ret = pread (_fd, buf, len, offset); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pread of %d bytes returned %d (%s)", - len, ret, strerror (errno)); - - op_errno = errno; - } - - } - UNLOCK (&fd->lock); - - if (ret < 0) - goto out; - - weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret); - gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) ret, (unsigned char *) strong_checksum); - - op_ret = 0; -out: - STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, - weak_checksum, strong_checksum, NULL); - - GF_FREE (alloc_buf); - - return 0; -} - - -/** - * notify - when parent sends PARENT_UP, send CHILD_UP event from here - */ -int32_t -notify (xlator_t *this, - int32_t event, - void *data, - ...) -{ - switch (event) - { - case GF_EVENT_PARENT_UP: - { - /* Tell the parent that posix xlator is up */ - default_notify (this, GF_EVENT_CHILD_UP, data); - } - break; - default: - /* */ - break; - } - return 0; -} +#include <glusterfs/xlator.h> +#include "posix.h" int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } - - return ret; -} - -static int -posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) -{ - struct posix_private *priv = NULL; - int ret = -1; - struct stat st = {0,}; - - priv = this->private; - - ret = sys_lstat (priv->base_path, &st); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to stat " - "brick path %s (%s)", - priv->base_path, strerror (errno)); - return ret; - } - - if ((uid == -1 || st.st_uid == uid) && - (gid == -1 || st.st_gid == gid)) - return 0; - - ret = sys_chown (priv->base_path, uid, gid); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Failed to set " - "uid/gid for brick path %s, %s", - priv->base_path, strerror (errno)); - - return ret; -} - - -static int -set_batch_fsync_mode (struct posix_private *priv, const char *str) -{ - if (strcmp (str, "none") == 0) - priv->batch_fsync_mode = BATCH_NONE; - else if (strcmp (str, "syncfs") == 0) - priv->batch_fsync_mode = BATCH_SYNCFS; - else if (strcmp (str, "syncfs-single-fsync") == 0) - priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; - else if (strcmp (str, "syncfs-reverse-fsync") == 0) - priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; - else if (strcmp (str, "reverse-fsync") == 0) - priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; - else - return -1; - - return 0; -} - -#ifdef GF_DARWIN_HOST_OS -static int -set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) -{ - if (strcmp (str, "none") == 0) - priv->xattr_user_namespace = XATTR_NONE; - else if (strcmp (str, "strip") == 0) - priv->xattr_user_namespace = XATTR_STRIP; - else if (strcmp (str, "append") == 0) - priv->xattr_user_namespace = XATTR_APPEND; - else if (strcmp (str, "both") == 0) - priv->xattr_user_namespace = XATTR_BOTH; - else - return -1; - return 0; -} -#endif - -int -reconfigure (xlator_t *this, dict_t *options) -{ - int ret = -1; - struct posix_private *priv = NULL; - int32_t uid = -1; - int32_t gid = -1; - char *batch_fsync_mode_str = NULL; - - priv = this->private; - - GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); - GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); - if (uid != -1 || gid != -1) - posix_set_owner (this, uid, gid); - - GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, - options, uint32, out); - - GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, - options, str, out); - - if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { - gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", - batch_fsync_mode_str); - goto out; - } +mem_acct_init(xlator_t *this); -#ifdef GF_DARWIN_HOST_OS - - char *xattr_user_namespace_mode_str = NULL; - - GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, - options, str, out); - - if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { - gf_log (this->name, GF_LOG_ERROR, "Unknown xattr user namespace mode string: %s", - xattr_user_namespace_mode_str); - goto out; - } - -#endif - - GF_OPTION_RECONF ("linux-aio", priv->aio_configured, - options, bool, out); - - if (priv->aio_configured) - posix_aio_on (this); - else - posix_aio_off (this); - - GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks, - options, bool, out); - - GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, - options, bool, out); - - if (priv->node_uuid_pathinfo && - (gf_uuid_is_null (priv->glusterd_uuid))) { - gf_log (this->name, GF_LOG_INFO, - "glusterd uuid is NULL, pathinfo xattr would" - " fallback to <hostname>:<export>"); - } - - GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, - options, uint32, out); - posix_spawn_health_check_thread (this); - - ret = 0; -out: - return ret; -} - - -/** - * init - - */ -int -init (xlator_t *this) -{ - struct posix_private *_private = NULL; - data_t *dir_data = NULL; - data_t *tmp_data = NULL; - struct stat buf = {0,}; - gf_boolean_t tmp_bool = 0; - int dict_ret = 0; - int ret = 0; - int op_ret = -1; - ssize_t size = -1; - int32_t janitor_sleep = 0; - uuid_t old_uuid = {0,}; - uuid_t dict_uuid = {0,}; - uuid_t gfid = {0,}; - uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; - char *guuid = NULL; - int32_t uid = -1; - int32_t gid = -1; - char *batch_fsync_mode_str; - - dir_data = dict_get (this->options, "directory"); - - if (this->children) { - gf_log (this->name, GF_LOG_CRITICAL, - "FATAL: storage/posix cannot have subvolumes"); - ret = -1; - goto out; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling. Please check the volume file."); - } - - if (!dir_data) { - gf_log (this->name, GF_LOG_CRITICAL, - "Export directory not specified in volume file."); - ret = -1; - goto out; - } - - umask (000); // umask `masking' is done at the client side - - /* Check whether the specified directory exists, if not log it. */ - op_ret = stat (dir_data->data, &buf); - if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { - gf_log (this->name, GF_LOG_ERROR, - "Directory '%s' doesn't exist, exiting.", - dir_data->data); - ret = -1; - goto out; - } - - /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr (dir_data->data, - "trusted.glusterfs.test", "working", 8, 0); - if (op_ret != -1) { - sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); - } else { - tmp_data = dict_get (this->options, - "mandate-attribute"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &tmp_bool) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for key " - "\"mandate-attribute\""); - ret = -1; - goto out; - } - if (!tmp_bool) { - gf_log (this->name, GF_LOG_WARNING, - "Extended attribute not supported, " - "starting as per option"); - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Extended attribute not supported, " - "exiting."); - ret = -1; - goto out; - } - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Extended attribute not supported, exiting."); - ret = -1; - goto out; - } - } - - tmp_data = dict_get (this->options, "volume-id"); - if (tmp_data) { - op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "wrong volume-id (%s) set in volume file", - tmp_data->data); - ret = -1; - goto out; - } - size = sys_lgetxattr (dir_data->data, - "trusted.glusterfs.volume-id", old_uuid, 16); - if (size == 16) { - if (gf_uuid_compare (old_uuid, dict_uuid)) { - gf_log (this->name, GF_LOG_ERROR, - "mismatching volume-id (%s) received. " - "already is a part of volume %s ", - tmp_data->data, uuid_utoa (old_uuid)); - ret = -1; - goto out; - } - } else if ((size == -1) && - (errno == ENODATA || errno == ENOATTR)) { - gf_log (this->name, GF_LOG_ERROR, - "Extended attribute trusted.glusterfs." - "volume-id is absent"); - ret = -1; - goto out; - - } else if ((size == -1) && (errno != ENODATA) && - (errno != ENOATTR)) { - /* Wrong 'volume-id' is set, it should be error */ - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to fetch volume-id (%s)", - dir_data->data, strerror (errno)); - ret = -1; - goto out; - } else { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "failed to fetch proper volume id from export"); - goto out; - } - } - - /* Now check if the export directory has some other 'gfid', - other than that of root '/' */ - size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); - if (size == 16) { - if (!__is_root_gfid (gfid)) { - gf_log (this->name, GF_LOG_WARNING, - "%s: gfid (%s) is not that of glusterfs '/' ", - dir_data->data, uuid_utoa (gfid)); - ret = -1; - goto out; - } - } else if (size != -1) { - /* Wrong 'gfid' is set, it should be error */ - gf_log (this->name, GF_LOG_WARNING, - "%s: wrong value set as gfid", - dir_data->data); - ret = -1; - goto out; - } else if ((size == -1) && (errno != ENODATA) && - (errno != ENOATTR)) { - /* Wrong 'gfid' is set, it should be error */ - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to fetch gfid (%s)", - dir_data->data, strerror (errno)); - ret = -1; - goto out; - } else { - /* First time volume, set the GFID */ - size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, - 16, XATTR_CREATE); - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to set gfid (%s)", - dir_data->data, strerror (errno)); - ret = -1; - goto out; - } - } - - size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, - NULL, 0); - if ((size < 0) && (errno == ENOTSUP)) - gf_log (this->name, GF_LOG_WARNING, - "Posix access control list is not supported."); - - ret = 0; - _private = GF_CALLOC (1, sizeof (*_private), - gf_posix_mt_posix_private); - if (!_private) { - ret = -1; - goto out; - } - - _private->base_path = gf_strdup (dir_data->data); - _private->base_path_length = strlen (_private->base_path); - - /* - * _XOPEN_PATH_MAX is the longest file path len we MUST - * support according to POSIX standard. When prepended - * by the brick base path it may exceed backed filesystem - * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If - * this is the case, chdir() to the brick base path and - * use relative paths when they are too long. See also - * MAKE_REAL_PATH in posix-handle.h - */ - _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); - if (_private->path_max != -1 && - _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { - ret = chdir(_private->base_path); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "chdir() to \"%s\" failed", - _private->base_path); - goto out; - } -#ifdef __NetBSD__ - /* - * At least on NetBSD, the chdir() above uncovers a - * race condition which cause file lookup to fail - * with ENODATA for a few seconds. The volume quickly - * reaches a sane state, but regression tests are fast - * enough to choke on it. The reason is obscure (as - * often with race conditions), but sleeping here for - * a second seems to workaround the problem. - */ - sleep(1); -#endif - } - - - LOCK_INIT (&_private->lock); - - ret = dict_get_str (this->options, "hostname", &_private->hostname); - if (ret) { - _private->hostname = GF_CALLOC (256, sizeof (char), - gf_common_mt_char); - if (!_private->hostname) { - goto out; - } - ret = gethostname (_private->hostname, 256); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "could not find hostname (%s)", strerror (errno)); - } - } - - _private->export_statfs = 1; - tmp_data = dict_get (this->options, "export-statfs-size"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->export_statfs) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "'export-statfs-size' takes only boolean " - "options"); - goto out; - } - if (!_private->export_statfs) - gf_log (this->name, GF_LOG_DEBUG, - "'statfs()' returns dummy size"); - } - - _private->background_unlink = 0; - tmp_data = dict_get (this->options, "background-unlink"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->background_unlink) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "'background-unlink' takes only boolean " - "options"); - goto out; - } - - if (_private->background_unlink) - gf_log (this->name, GF_LOG_DEBUG, - "unlinks will be performed in background"); - } - - tmp_data = dict_get (this->options, "o-direct"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->o_direct) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for 'o-direct'"); - goto out; - } - if (_private->o_direct) - gf_log (this->name, GF_LOG_DEBUG, - "o-direct mode is enabled (O_DIRECT " - "for every open)"); - } - - tmp_data = dict_get (this->options, "update-link-count-parent"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->update_pgfid_nlinks) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "wrong value provided for " - "'update-link-count-parent'"); - goto out; - } - if (_private->update_pgfid_nlinks) - gf_log (this->name, GF_LOG_DEBUG, - "update-link-count-parent is enabled. Thus for each " - "file an extended attribute representing the " - "number of hardlinks for that file within the " - "same parent directory is set."); - } - - ret = dict_get_str (this->options, "glusterd-uuid", &guuid); - if (!ret) { - if (gf_uuid_parse (guuid, _private->glusterd_uuid)) - gf_log (this->name, GF_LOG_WARNING, "Cannot parse " - "glusterd (node) UUID, node-uuid xattr " - "request would return - \"No such attribute\""); - } else { - gf_log (this->name, GF_LOG_DEBUG, "No glusterd (node) UUID " - "passed - node-uuid xattr request will return " - "\"No such attribute\""); - } - ret = 0; - - _private->janitor_sleep_duration = 600; - - dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", - &janitor_sleep); - if (dict_ret == 0) { - gf_log (this->name, GF_LOG_DEBUG, - "Setting janitor sleep duration to %d.", - janitor_sleep); - - _private->janitor_sleep_duration = janitor_sleep; - } - /* performing open dir on brick dir locks the brick dir - * and prevents it from being unmounted - */ - _private->mount_lock = opendir (dir_data->data); - if (!_private->mount_lock) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "Could not lock brick directory"); - goto out; - } -#ifndef GF_DARWIN_HOST_OS - { - struct rlimit lim; - lim.rlim_cur = 1048576; - lim.rlim_max = 1048576; - - if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to set 'ulimit -n " - " 1048576': %s", strerror(errno)); - lim.rlim_cur = 65536; - lim.rlim_max = 65536; - - if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to set maximum allowed open " - "file descriptors to 64k: %s", - strerror(errno)); - } - else { - gf_log (this->name, GF_LOG_INFO, - "Maximum allowed open file descriptors " - "set to 65536"); - } - } - } -#endif - this->private = (void *)_private; - - op_ret = posix_handle_init (this); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Posix handle setup failed"); - ret = -1; - goto out; - } - - op_ret = posix_handle_trash_init (this); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "Posix landfill setup failed"); - ret = -1; - goto out; - } - - _private->aio_init_done = _gf_false; - _private->aio_capable = _gf_false; - - GF_OPTION_INIT ("brick-uid", uid, int32, out); - GF_OPTION_INIT ("brick-gid", gid, int32, out); - if (uid != -1 || gid != -1) - posix_set_owner (this, uid, gid); - - GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); - - if (_private->aio_configured) { - op_ret = posix_aio_on (this); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Posix AIO init failed"); - ret = -1; - goto out; - } - } - - GF_OPTION_INIT ("node-uuid-pathinfo", - _private->node_uuid_pathinfo, bool, out); - if (_private->node_uuid_pathinfo && - (gf_uuid_is_null (_private->glusterd_uuid))) { - gf_log (this->name, GF_LOG_INFO, - "glusterd uuid is NULL, pathinfo xattr would" - " fallback to <hostname>:<export>"); - } - - _private->health_check_active = _gf_false; - GF_OPTION_INIT ("health-check-interval", - _private->health_check_interval, uint32, out); - if (_private->health_check_interval) - posix_spawn_health_check_thread (this); - - pthread_mutex_init (&_private->janitor_lock, NULL); - pthread_cond_init (&_private->janitor_cond, NULL); - INIT_LIST_HEAD (&_private->janitor_fds); - - posix_spawn_janitor_thread (this); - - pthread_mutex_init (&_private->fsync_mutex, NULL); - pthread_cond_init (&_private->fsync_cond, NULL); - INIT_LIST_HEAD (&_private->fsyncs); - - ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" - " creation failed (%s)", strerror (errno)); - goto out; - } - - GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); - - if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { - gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", - batch_fsync_mode_str); - goto out; - } - -#ifdef GF_DARWIN_HOST_OS - - char *xattr_user_namespace_mode_str = NULL; - - GF_OPTION_INIT ("xattr-user-namespace-mode", - xattr_user_namespace_mode_str, str, out); - - if (set_xattr_user_namespace_mode (_private, - xattr_user_namespace_mode_str) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "Unknown xattr user namespace mode string: %s", - xattr_user_namespace_mode_str); - goto out; - } -#endif - - GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, - uint32, out); -out: - return ret; -} - -void -fini (xlator_t *this) -{ - struct posix_private *priv = this->private; - if (!priv) - return; - this->private = NULL; - /*unlock brick dir*/ - if (priv->mount_lock) - closedir (priv->mount_lock); - GF_FREE (priv); - return; -} +extern struct volume_options posix_options[]; struct xlator_dumpops dumpops = { - .priv = posix_priv, - .inode = posix_inode, + .priv = posix_priv, + .inode = posix_inode, }; struct xlator_fops fops = { - .lookup = posix_lookup, - .stat = posix_stat, - .opendir = posix_opendir, - .readdir = posix_readdir, - .readdirp = posix_readdirp, - .readlink = posix_readlink, - .mknod = posix_mknod, - .mkdir = posix_mkdir, - .unlink = posix_unlink, - .rmdir = posix_rmdir, - .symlink = posix_symlink, - .rename = posix_rename, - .link = posix_link, - .truncate = posix_truncate, - .create = posix_create, - .open = posix_open, - .readv = posix_readv, - .writev = posix_writev, - .statfs = posix_statfs, - .flush = posix_flush, - .fsync = posix_fsync, - .setxattr = posix_setxattr, - .fsetxattr = posix_fsetxattr, - .getxattr = posix_getxattr, - .fgetxattr = posix_fgetxattr, - .removexattr = posix_removexattr, - .fremovexattr = posix_fremovexattr, - .fsyncdir = posix_fsyncdir, - .access = posix_access, - .ftruncate = posix_ftruncate, - .fstat = posix_fstat, - .lk = posix_lk, - .inodelk = posix_inodelk, - .finodelk = posix_finodelk, - .entrylk = posix_entrylk, - .fentrylk = posix_fentrylk, - .rchecksum = posix_rchecksum, - .xattrop = posix_xattrop, - .fxattrop = posix_fxattrop, - .setattr = posix_setattr, - .fsetattr = posix_fsetattr, - .fallocate = _posix_fallocate, - .discard = posix_discard, - .zerofill = posix_zerofill, - .ipc = posix_ipc, + .lookup = posix_lookup, + .stat = posix_stat, + .opendir = posix_opendir, + .readdir = posix_readdir, + .readdirp = posix_readdirp, + .readlink = posix_readlink, + .mknod = posix_mknod, + .mkdir = posix_mkdir, + .unlink = posix_unlink, + .rmdir = posix_rmdir, + .symlink = posix_symlink, + .rename = posix_rename, + .link = posix_link, + .truncate = posix_truncate, + .create = posix_create, + .open = posix_open, + .readv = posix_readv, + .writev = posix_writev, + .statfs = posix_statfs, + .flush = posix_flush, + .fsync = posix_fsync, + .setxattr = posix_setxattr, + .fsetxattr = posix_fsetxattr, + .getxattr = posix_getxattr, + .fgetxattr = posix_fgetxattr, + .removexattr = posix_removexattr, + .fremovexattr = posix_fremovexattr, + .fsyncdir = posix_fsyncdir, + .access = posix_access, + .ftruncate = posix_ftruncate, + .fstat = posix_fstat, + .lk = posix_lk, + .inodelk = posix_inodelk, + .finodelk = posix_finodelk, + .entrylk = posix_entrylk, + .fentrylk = posix_fentrylk, + .rchecksum = posix_rchecksum, + .xattrop = posix_xattrop, + .fxattrop = posix_fxattrop, + .setattr = posix_setattr, + .fsetattr = posix_fsetattr, + .fallocate = posix_glfallocate, + .discard = posix_discard, + .zerofill = posix_zerofill, + .ipc = posix_ipc, + .seek = posix_seek, + .lease = posix_lease, + .put = posix_put, + .copy_file_range = posix_copy_file_range, }; struct xlator_cbks cbks = { - .release = posix_release, - .releasedir = posix_releasedir, - .forget = posix_forget + .release = posix_release, + .releasedir = posix_releasedir, + .forget = posix_forget, }; -struct volume_options options[] = { - { .key = {"o-direct"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"directory"}, - .type = GF_OPTION_TYPE_PATH }, - { .key = {"hostname"}, - .type = GF_OPTION_TYPE_ANY }, - { .key = {"export-statfs-size"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"mandate-attribute"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"background-unlink"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"janitor-sleep-duration"}, - .type = GF_OPTION_TYPE_INT }, - { .key = {"volume-id"}, - .type = GF_OPTION_TYPE_ANY }, - { .key = {"glusterd-uuid"}, - .type = GF_OPTION_TYPE_STR }, - { - .key = {"linux-aio"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Support for native Linux AIO" - }, - { - .key = {"brick-uid"}, - .type = GF_OPTION_TYPE_INT, - .min = -1, - .validate = GF_OPT_VALIDATE_MIN, - .default_value = "-1", - .description = "Support for setting uid of brick's owner" - }, - { - .key = {"brick-gid"}, - .type = GF_OPTION_TYPE_INT, - .min = -1, - .validate = GF_OPT_VALIDATE_MIN, - .default_value = "-1", - .description = "Support for setting gid of brick's owner" - }, - { .key = {"node-uuid-pathinfo"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "return glusterd's node-uuid in pathinfo xattr" - " string instead of hostname" - }, - { - .key = {"health-check-interval"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .default_value = "30", - .validate = GF_OPT_VALIDATE_MIN, - .description = "Interval in seconds for a filesystem health check, " - "set to 0 to disable" - }, - { .key = {"batch-fsync-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "reverse-fsync", - .description = "Possible values:\n" - "\t- syncfs: Perform one syncfs() on behalf oa batch" - "of fsyncs.\n" - "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" - " of fsyncs and one fsync() per batch.\n" - "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" - " of fsyncs and fsync() each file in the batch in reverse order.\n" - " in reverse order.\n" - "\t- reverse-fsync: Perform fsync() of each file in the batch in" - " reverse order." - }, - { .key = {"batch-fsync-delay-usec"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - .description = "Num of usecs to wait for aggregating fsync" - " requests", - }, - { .key = {"update-link-count-parent"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Enable placeholders for gfid to path conversion" - }, -#if GF_DARWIN_HOST_OS - { .key = {"xattr-user-namespace-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "none", - .description = "Option to control XATTR user namespace on the raw filesystem: " - "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" - " The raw filesystem will not be compatible with OS X Finder.\n" - "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n" - }, -#endif - { .key = {NULL} } +xlator_api_t xlator_api = { + .init = posix_init, + .fini = posix_fini, + .notify = posix_notify, + .reconfigure = posix_reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = posix_options, + .identifier = "posix", + .category = GF_MAINTAINED, }; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index d53a488ff6e..b8db146eef2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -10,23 +10,14 @@ #ifndef _POSIX_H #define _POSIX_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <stdio.h> #include <unistd.h> #include <sys/types.h> #include <dirent.h> #include <time.h> -#ifdef linux -#ifdef __GLIBC__ +#ifdef HAVE_SET_FSID #include <sys/fsuid.h> -#else -#include <unistd.h> -#endif #endif #ifdef HAVE_SYS_XATTR_H @@ -37,13 +28,10 @@ #include <sys/extattr.h> #endif -#include "xlator.h" -#include "inode.h" -#include "compat.h" -#include "timer.h" +#include <glusterfs/compat.h> +#include <glusterfs/timer.h> #include "posix-mem-types.h" -#include "posix-handle.h" -#include "call-stub.h" +#include <glusterfs/call-stub.h> #ifdef HAVE_LIBAIO #include <libaio.h> @@ -53,200 +41,633 @@ #define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/ #define MAX_NO_VECT 1024 -#define LINKTO "trusted.glusterfs.dht.linkto" +#define XATTR_KEY_BUF_SIZE 4096 +#define XATTR_VAL_BUF_SIZE 8192 + +#define ACL_BUFFER_MAX 4096 /* size of character buffer */ + +#define DHT_LINKTO "trusted.glusterfs.dht.linkto" + +#define POSIX_GFID_HANDLE_SIZE(base_path_len) \ + (base_path_len + SLEN("/") + SLEN(GF_HIDDEN_PATH) + SLEN("/") + \ + SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */; + +#define POSIX_GFID_HANDLE_RELSIZE \ + SLEN("../") + SLEN("../") + SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1; + +#define GF_UNLINK_TRUE 0x0000000000000001 +#define GF_UNLINK_FALSE 0x0000000000000000 + +#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out) \ + do { \ + if (frame->root->pid >= 0 && priv->disk_space_full && \ + !dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \ + op_ret = -1; \ + op_errno = ENOSPC; \ + gf_msg_debug("posix", ENOSPC, \ + "disk space utilization reached limits" \ + " for path %s ", \ + priv->base_path); \ + goto out; \ + } \ + } while (0) + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (sys_utimensat(AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) (lutimes(path, tv)) +#endif -#define POSIX_GFID_HANDLE_SIZE(base_path_len) (base_path_len + SLEN("/") \ - + SLEN(GF_HIDDEN_PATH) + SLEN("/") \ - + SLEN("00/") \ - + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */; +#define GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xattr_req, op_ret, \ + op_errno, _uuid_req, out) \ + do { \ + int _ret = 0; \ + /* TODO: Remove pid check once trash implements client side \ + * logic to assign gfid for entry creations inside .trashcan \ + */ \ + if (frame->root->pid == GF_SERVER_PID_TRASH) \ + break; \ + _ret = dict_get_gfuuid(xattr_req, "gfid-req", &_uuid_req); \ + if (_ret) { \ + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, \ + "failed to get the gfid from dict for %s", loc->path); \ + op_ret = -1; \ + op_errno = EINVAL; \ + goto out; \ + } \ + if (gf_uuid_is_null(_uuid_req)) { \ + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, \ + "gfid is null for %s", loc->path); \ + op_ret = -1; \ + op_errno = EINVAL; \ + goto out; \ + } \ + } while (0) /** * posix_fd - internal structure common to file and directory fd's */ struct posix_fd { - int fd; /* fd returned by the kernel */ - int32_t flags; /* flags for open/creat */ - DIR * dir; /* handle returned by the kernel */ - off_t dir_eof; /* offset at dir EOF */ - int odirect; - struct list_head list; /* to add to the janitor list */ + int fd; /* fd returned by the kernel */ + int32_t flags; /* flags for open/creat */ + DIR *dir; /* handle returned by the kernel */ + off_t dir_eof; /* offset at dir EOF */ + struct list_head list; /* to add to the janitor list */ + int odirect; + xlator_t *xl; + char _pad[4]; /* manual padding */ }; - struct posix_private { - char *base_path; - int32_t base_path_length; - int32_t path_max; - - gf_lock_t lock; - - char *hostname; - /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; - - time_t last_landfill_check; - int32_t janitor_sleep_duration; - struct list_head janitor_fds; - pthread_cond_t janitor_cond; - pthread_mutex_t janitor_lock; + char *base_path; + int32_t base_path_length; + int32_t path_max; - int64_t read_value; /* Total read, from init */ - int64_t write_value; /* Total write, from init */ - int64_t nr_files; -/* - In some cases, two exported volumes may reside on the same - partition on the server. Sending statvfs info for both - the volumes will lead to erroneous df output at the client, - since free space on the partition will be counted twice. + gf_lock_t lock; - In such cases, user can disable exporting statvfs info - on one of the volumes by setting this option. -*/ - gf_boolean_t export_statfs; + char *hostname; - gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ + time_t last_landfill_check; + gf_atomic_t read_value; /* Total read, from init */ + gf_atomic_t write_value; /* Total write, from init */ -/* - decide whether posix_unlink does open (file), unlink (file), close (fd) - instead of just unlink (file). with the former approach there is no lockout - of access to parent directory during removal of very large files for the - entire duration of freeing of data blocks. -*/ - gf_boolean_t background_unlink; + /* janitor task which cleans up /.trash (created by replicate) */ + struct gf_tw_timer_list *janitor; -/* janitor thread which cleans up /.trash (created by replicate) */ - pthread_t janitor; - gf_boolean_t janitor_present; - char * trash_path; -/* lock for brick dir */ - DIR *mount_lock; + char *trash_path; + /* lock for brick dir */ + int mount_lock; - struct stat handledir; + struct stat handledir; -/* uuid of glusterd that swapned the brick process */ - uuid_t glusterd_uuid; + /* uuid of glusterd that swapned the brick process */ + uuid_t glusterd_uuid; - gf_boolean_t aio_configured; - gf_boolean_t aio_init_done; - gf_boolean_t aio_capable; #ifdef HAVE_LIBAIO - io_context_t ctxp; - pthread_t aiothread; + io_context_t ctxp; + pthread_t aiothread; #endif - /* node-uuid in pathinfo xattr */ - gf_boolean_t node_uuid_pathinfo; - - pthread_t fsyncer; - struct list_head fsyncs; - pthread_mutex_t fsync_mutex; - pthread_cond_t fsync_cond; - int fsync_queue_count; - - enum { - BATCH_NONE = 0, - BATCH_SYNCFS, - BATCH_SYNCFS_SINGLE_FSYNC, - BATCH_REVERSE_FSYNC, - BATCH_SYNCFS_REVERSE_FSYNC - } batch_fsync_mode; - - uint32_t batch_fsync_delay_usec; - gf_boolean_t update_pgfid_nlinks; - - /* seconds to sleep between health checks */ - uint32_t health_check_interval; - pthread_t health_check; - gf_boolean_t health_check_active; + pthread_t fsyncer; + struct list_head fsyncs; + pthread_mutex_t fsync_mutex; + pthread_cond_t fsync_cond; + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; + int fsync_queue_count; + int32_t janitor_sleep_duration; + + enum { + BATCH_NONE = 0, + BATCH_SYNCFS, + BATCH_SYNCFS_SINGLE_FSYNC, + BATCH_REVERSE_FSYNC, + BATCH_SYNCFS_REVERSE_FSYNC + } batch_fsync_mode; + + uint32_t batch_fsync_delay_usec; + char gfid2path_sep[8]; + + /* seconds to sleep between health checks */ + uint32_t health_check_interval; + /* seconds to sleep to wait for aio write finish for health checks */ + uint32_t health_check_timeout; + pthread_t health_check; + + double disk_reserve; + pthread_t disk_space_check; + uint32_t disk_space_full; #ifdef GF_DARWIN_HOST_OS - enum { - XATTR_NONE = 0, - XATTR_STRIP, - XATTR_APPEND, - XATTR_BOTH, - } xattr_user_namespace; + enum { + XATTR_NONE = 0, + XATTR_STRIP, + XATTR_APPEND, + XATTR_BOTH, + } xattr_user_namespace; #endif + /* Option to handle the cases of multiple bricks exported from + same backend. Very much usable in brick-splitting feature. */ + int32_t shared_brick_count; + + /*Option to set mode bit permission that will always be set on + file/directory. */ + mode_t force_create_mode; + mode_t force_directory_mode; + mode_t create_mask; + mode_t create_directory_mask; + uint32_t max_hardlinks; + int32_t arrdfd[256]; + int dirfd; + + /* This option is used for either to call a landfill_purge or not */ + gf_boolean_t disable_landfill_purge; + + gf_boolean_t fips_mode_rchecksum; + gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; + + gf_boolean_t disk_space_check_active; + char disk_unit; + gf_boolean_t health_check_active; + gf_boolean_t update_pgfid_nlinks; + gf_boolean_t gfid2path; + /* node-uuid in pathinfo xattr */ + gf_boolean_t node_uuid_pathinfo; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both + the volumes will lead to erroneous df output at the client, + since free space on the partition will be counted twice. + + In such cases, user can disable exporting statvfs info + on one of the volumes by setting this option. + */ + gf_boolean_t export_statfs; + + gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ + + /* + decide whether posix_unlink does open (file), unlink (file), close (fd) + instead of just unlink (file). with the former approach there is no + lockout of access to parent directory during removal of very large files + for the entire duration of freeing of data blocks. + */ + gf_boolean_t background_unlink; + gf_boolean_t aio_configured; + gf_boolean_t aio_init_done; + gf_boolean_t aio_capable; + uint32_t rel_fdcount; }; typedef struct { - xlator_t *this; - const char *real_path; - dict_t *xattr; - struct iatt *stbuf; - loc_t *loc; - inode_t *inode; /* for all do_xattrop() key handling */ - fd_t *fd; - int fdnum; - int flags; - int32_t op_errno; + call_frame_t *frame; + xlator_t *this; + const char *real_path; + dict_t *xattr; + struct iatt *stbuf; + loc_t *loc; + inode_t *inode; /* for all do_xattrop() key handling */ + fd_t *fd; + int fdnum; + int flags; + char *list; + size_t list_size; + int32_t op_errno; + + char _pad[4]; /* manual padding */ } posix_xattr_filler_t; +typedef struct { + uint64_t unlink_flag; + pthread_mutex_t xattrop_lock; + pthread_mutex_t write_atomic_lock; + pthread_mutex_t pgfid_lock; +} posix_inode_ctx_t; -#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path) +#define POSIX_BASE_PATH(this) \ + (((struct posix_private *)this->private)->base_path) -#define POSIX_BASE_PATH_LEN(this) (((struct posix_private *)this->private)->base_path_length) +#define POSIX_BASE_PATH_LEN(this) \ + (((struct posix_private *)this->private)->base_path_length) #define POSIX_PATH_MAX(this) (((struct posix_private *)this->private)->path_max) +#define POSIX_GET_FILE_UNLINK_PATH(base_path, gfid, unlink_path) \ + do { \ + int path_len = 0; \ + char gfid_str[64] = {0}; \ + uuid_utoa_r(gfid, gfid_str); \ + path_len = strlen(base_path) + 1 + SLEN(GF_UNLINK_PATH) + 1 + \ + UUID_CANONICAL_FORM_LEN + 1; \ + unlink_path = alloca(path_len); \ + if (!unlink_path) { \ + gf_msg("posix", GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, \ + "Failed to get unlink_path"); \ + break; \ + } \ + sprintf(unlink_path, "%s/%s/%s", base_path, GF_UNLINK_PATH, gfid_str); \ + } while (0) + /* Helper functions */ -int posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, - dict_t *xattr_req); -int posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p); -int posix_istat (xlator_t *this, uuid_t gfid, const char *basename, - struct iatt *iatt); -int posix_pstat (xlator_t *this, uuid_t gfid, const char *real_path, - struct iatt *iatt); -dict_t *posix_xattr_fill (xlator_t *this, const char *path, loc_t *loc, - fd_t *fd, int fdnum, dict_t *xattr, struct iatt *buf); -int posix_handle_pair (xlator_t *this, const char *real_path, char *key, - data_t *value, int flags); -int posix_fhandle_pair (xlator_t *this, int fd, char *key, data_t *value, - int flags); -void posix_spawn_janitor_thread (xlator_t *this); -int posix_get_file_contents (xlator_t *this, uuid_t pargfid, - const char *name, char **contents); -int posix_set_file_contents (xlator_t *this, const char *path, char *key, - data_t *value, int flags); -int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req); -int posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req); -int posix_entry_create_xattr_set (xlator_t *this, const char *path, - dict_t *dict); - -int posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd); -void posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf); - -gf_boolean_t posix_special_xattr (char **pattern, char *key); +int +posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx); + +int +posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, + posix_inode_ctx_t **ctx); + +int +__posix_inode_ctx_set_unlink_flag(inode_t *inode, xlator_t *this, uint64_t ctx); + +int +__posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, + posix_inode_ctx_t **ctx); + +int +posix_gfid_set(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req, + pid_t pid, int *op_errno); +int +posix_fdstat(xlator_t *this, inode_t *inode, int fd, struct iatt *stbuf_p); +int +posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename, + struct iatt *iatt); +int +posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *real_path, + struct iatt *iatt, gf_boolean_t inode_locked); +dict_t * +posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd, + int fdnum, dict_t *xattr, struct iatt *buf); +int +posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf); +int +posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, + data_t *value, int flags, struct iatt *stbuf, fd_t *_fd); void -__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, - off_t offset, size_t size); -void posix_spawn_health_check_thread (xlator_t *this); +posix_janitor_timer_start(xlator_t *this); +int +posix_acl_xattr_set(xlator_t *this, const char *path, dict_t *xattr_req); +int +posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, + dict_t *xattr_req); +int +posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, + dict_t *dict); -void *posix_fsyncer (void *); int -posix_get_ancestry (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, int32_t *op_errno, - dict_t *xdata); +posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, + int *op_errno); +void +posix_fill_ino_from_gfid(xlator_t *this, struct iatt *buf); + +gf_boolean_t +posix_special_xattr(char **pattern, char *key); void -posix_gfid_unset (xlator_t *this, dict_t *xdata); +__posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size); +int +posix_spawn_health_check_thread(xlator_t *this); int -posix_pacl_set (const char *path, const char *key, const char *acl_s); +posix_spawn_disk_space_check_thread(xlator_t *this); + +void * +posix_fsyncer(void *); +int +posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, + char **path, int type, int32_t *op_errno, dict_t *xdata); +int +posix_handle_mdata_xattr(call_frame_t *frame, const char *name, int *op_errno); +int +posix_handle_georep_xattrs(call_frame_t *, const char *, int *, gf_boolean_t); +int32_t +posix_resolve_dirgfid_to_path(const uuid_t dirgfid, const char *brick_path, + const char *bname, char **path); +void +posix_gfid_unset(xlator_t *this, dict_t *xdata); int -posix_pacl_get (const char *path, const char *key, char **acl_s); +posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s); int32_t -posix_get_objectsignature (char *, dict_t *); +posix_get_objectsignature(char *, dict_t *); int32_t -posix_fdget_objectsignature (int, dict_t *); +posix_fdget_objectsignature(int, dict_t *); + +gf_boolean_t +posix_is_bulk_removexattr(char *name, dict_t *dict); + +int32_t +posix_set_iatt_in_dict(dict_t *, struct iatt *, struct iatt *); + +mode_t posix_override_umask(mode_t, mode_t); + +int32_t +posix_priv(xlator_t *this); + +int32_t +posix_inode(xlator_t *this); + +void +posix_fini(xlator_t *this); + +int +posix_init(xlator_t *this); + +int +posix_reconfigure(xlator_t *this, dict_t *options); + +int32_t +posix_notify(xlator_t *this, int32_t event, void *data, ...); + +/* posix-entry-ops.c FOP signatures */ +int32_t +posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata); + +int +posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata); + +int +posix_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata); + +int +posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata); + +int +posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata); + +int32_t +posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata); + +/* posix-inode-fs-ops.c FOP signatures */ +int +posix_forget(xlator_t *this, inode_t *inode); + +int32_t +posix_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int32_t +posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +int +posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata); + +int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata); + +int32_t +posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata); + +int32_t +posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); + +int32_t +posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata); + +int32_t +posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata); + +int32_t +posix_releasedir(xlator_t *this, fd_t *fd); + +int32_t +posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata); + +int32_t +posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata); + +int32_t +posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); + +int32_t +posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata); + +int32_t +posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int32_t +posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata); + +int32_t +posix_release(xlator_t *this, fd_t *fd); + +int32_t +posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata); + +int32_t +posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata); + +int +posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata); + +int +posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, + char **path, int type, int32_t *op_errno, dict_t *xdata); + +int32_t +posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata); + +int32_t +posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata); + +int32_t +posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata); + +int32_t +posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata); + +int32_t +posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata); + +int32_t +posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata); + +int +posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + +int +posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + +int +posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata); + +int32_t +posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata); + +int32_t +posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata); + +int32_t +posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct gf_lease *lease, dict_t *xdata); + +int32_t +posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata); + +int32_t +posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int32_t +posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int32_t +posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata); + +int32_t +posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata); + +int32_t +posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int32_t +posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int32_t +posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len, dict_t *xdata); + +int32_t +posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, uint32_t flags, struct iovec *vector, int32_t count, + off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata); + +int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata); + +int32_t +posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict, + struct iatt *in_stbuf); + +gf_cs_obj_state +posix_cs_check_status(xlator_t *this, const char *realpath, int *fd, + struct iatt *buf); + +int +posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state, + char const *path, int *fd); + +gf_cs_obj_state +posix_cs_heal_state(xlator_t *this, const char *path, int *fd, + struct iatt *stbuf); +int +posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + struct iatt *buf, const char *realpath, dict_t *xattr_req, + dict_t **xattr_rsp, gf_boolean_t ignore_failure); +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + +int +posix_spawn_ctx_janitor_thread(xlator_t *this); + +void +posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); + +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data); #endif /* _POSIX_H */ |
