diff options
Diffstat (limited to 'xlators/mount/fuse')
| -rw-r--r-- | xlators/mount/fuse/src/Makefile.am | 34 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 6412 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 537 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-extra.c | 137 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-extra.h | 42 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-helpers.c | 605 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-mem-types.h | 28 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-resolve.c | 724 | ||||
| -rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 502 | ||||
| -rwxr-xr-x | xlators/mount/fuse/utils/mount_glusterfs.in | 28 |
10 files changed, 6866 insertions, 2183 deletions
diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am index 9d8d45e4f..653121d18 100644 --- a/xlators/mount/fuse/src/Makefile.am +++ b/xlators/mount/fuse/src/Makefile.am @@ -1,14 +1,36 @@ +noinst_HEADERS_linux = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\ + $(CONTRIBDIR)/fuse-include/mount_util.h\ + $(CONTRIBDIR)/fuse-lib/mount-gluster-compat.h +noinst_HEADERS_darwin = $(CONTRIBDIR)/fuse-include/fuse_kernel_macfuse.h +noinst_HEADERS_common = $(CONTRIBDIR)/fuse-include/fuse-mount.h\ + $(CONTRIBDIR)/fuse-include/fuse-misc.h fuse-mem-types.h \ + fuse-bridge.h -noinst_HEADERS = fuse-extra.h +if GF_DARWIN_HOST_OS + noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_darwin) +else + noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_linux) +endif xlator_LTLIBRARIES = fuse.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount -fuse_la_SOURCES = fuse-bridge.c fuse-extra.c -fuse_la_LDFLAGS = -module -avoidversion -shared -nostartfiles $(GF_FUSE_LDADD) -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \ - -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -DFUSE_USE_VERSION=26 +if GF_DARWIN_HOST_OS + mount_source=$(CONTRIBDIR)/macfuse/mount_darwin.c +else + mount_source=$(CONTRIBDIR)/fuse-lib/mount.c $(CONTRIBDIR)/fuse-lib/mount-common.c +endif +fuse_la_SOURCES = fuse-helpers.c fuse-resolve.c fuse-bridge.c \ + $(CONTRIBDIR)/fuse-lib/misc.c $(mount_source) -CLEANFILES = +fuse_la_LDFLAGS = -module -avoid-version +fuse_la_LIBADD = @GF_FUSE_LDADD@ +AM_CPPFLAGS = $(GF_CPPFLAGS) \ + -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/fuse-include \ + -I$(CONTRIBDIR)/fuse-lib $(GF_FUSE_CFLAGS) + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index ff214703b..6a5587c2d 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -1,1976 +1,3162 @@ /* - Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ +#include <sys/wait.h> +#include "fuse-bridge.h" +#include "mount-gluster-compat.h" +#include "glusterfs.h" +#include "glusterfs-acl.h" + +#ifdef __NetBSD__ +#undef open /* in perfuse.h, pulled from mount-gluster-compat.h */ +#endif + +static int gf_fuse_conn_err_log; +static int gf_fuse_xattr_enotsup_log; + +void fini (xlator_t *this_xl); + +static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + /* - * TODO: - * Need to free_state() when fuse_reply_err() + return. - * Check loc->path for "" after fuse_loc_fill in all fops - * (now being done in getattr, lookup) or better - make - * fuse_loc_fill() and inode_path() return success/failure. + * Send an invalidate notification up to fuse to purge the file from local + * page cache. */ +static int32_t +fuse_invalidate(xlator_t *this, inode_t *inode) +{ + fuse_private_t *priv = this->private; + uint64_t nodeid; -#include <stdint.h> -#include <signal.h> -#include <pthread.h> + /* + * NOTE: We only invalidate at the moment if fopen_keep_cache is + * enabled because otherwise this is a departure from default + * behavior. Specifically, the performance/write-behind xlator + * causes unconditional invalidations on write requests. + */ + if (!priv->fopen_keep_cache) + return 0; -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif /* _CONFIG_H */ + nodeid = inode_to_fuse_nodeid(inode); + gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid); + fuse_log_eh (this, "Sending invalidate inode id: %lu gfid: %s", nodeid, + uuid_utoa (inode->gfid)); + fuse_invalidate_inode(this, nodeid); -#include "glusterfs.h" -#include "logging.h" -#include "xlator.h" -#include "glusterfs.h" -#include "defaults.h" -#include "common-utils.h" - -#include <fuse/fuse_lowlevel.h> - -#include "fuse-extra.h" -#include "list.h" -#include "dict.h" - -#include "compat.h" -#include "compat-errno.h" - -/* TODO: when supporting posix acl, remove this definition */ -#define DISABLE_POSIX_ACL - -#define ZR_MOUNTPOINT_OPT "mountpoint" -#define ZR_DIRECT_IO_OPT "direct-io-mode" -#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check" - -#define BIG_FUSE_CHANNEL_SIZE 131072 - -struct fuse_private { - int fd; - struct fuse *fuse; - struct fuse_session *se; - struct fuse_chan *ch; - char *volfile; - size_t volfile_size; - char *mount_point; - struct iobuf *iobuf; - pthread_t fuse_thread; - char fuse_thread_started; - uint32_t direct_io_mode; - double entry_timeout; - double attribute_timeout; - pthread_cond_t first_call_cond; - pthread_mutex_t first_call_mutex; - char first_call; - gf_boolean_t strict_volfile_check; -}; -typedef struct fuse_private fuse_private_t; - -#define _FI_TO_FD(fi) ((fd_t *)((long)fi->fh)) - -#define FI_TO_FD(fi) ((_FI_TO_FD (fi))?(fd_ref (_FI_TO_FD(fi))):((fd_t *) 0)) - -#define FUSE_FOP(state, ret, op_num, fop, args ...) \ - do { \ - call_frame_t *frame = get_call_frame_for_req (state, 1); \ - xlator_t *xl = frame->this->children ? \ - frame->this->children->xlator : NULL; \ - frame->root->state = state; \ - frame->root->op = op_num; \ - STACK_WIND (frame, ret, xl, xl->fops->fop, args); \ - } while (0) - -#define GF_SELECT_LOG_LEVEL(_errno) \ - (((_errno == ENOENT) || (_errno == ESTALE))? \ - GF_LOG_DEBUG) - -typedef struct { - void *pool; - xlator_t *this; - inode_table_t *itable; - loc_t loc; - loc_t loc2; - fuse_req_t req; - int32_t flags; - off_t off; - size_t size; - unsigned long nlookup; - fd_t *fd; - dict_t *dict; - char *name; - char is_revalidate; -} fuse_state_t; - -int fuse_chan_receive (struct fuse_chan *ch, char *buf, int32_t size); + return 0; +} +static int32_t +fuse_forget_cbk (xlator_t *this, inode_t *inode) +{ + //Nothing to free in inode ctx, hence return. + return 0; +} -static void -free_state (fuse_state_t *state) +void +fuse_inode_set_need_lookup (inode_t *inode, xlator_t *this) { - loc_wipe (&state->loc); + uint64_t need_lookup = 1; - loc_wipe (&state->loc2); + if (!inode || !this) + return; - if (state->dict) { - dict_unref (state->dict); - state->dict = (void *)0xaaaaeeee; - } - if (state->name) { - FREE (state->name); - state->name = NULL; - } - if (state->fd) { - fd_unref (state->fd); - state->fd = (void *)0xfdfdfdfd; - } -#ifdef DEBUG - memset (state, 0x90, sizeof (*state)); -#endif - FREE (state); - state = NULL; + inode_ctx_set (inode, this, &need_lookup); + + return; } -fuse_state_t * -state_from_req (fuse_req_t req) +gf_boolean_t +fuse_inode_needs_lookup (inode_t *inode, xlator_t *this) { - fuse_state_t *state = NULL; - xlator_t *this = NULL; + uint64_t need_lookup = 0; + gf_boolean_t ret = _gf_false; - this = fuse_req_userdata (req); + if (!inode || !this) + return ret; - state = (void *)calloc (1, sizeof (*state)); - if (!state) - return NULL; - state->pool = this->ctx->pool; - state->itable = this->itable; - state->req = req; - state->this = this; + inode_ctx_get (inode, this, &need_lookup); + if (need_lookup) + ret = _gf_true; + need_lookup = 0; + inode_ctx_set (inode, this, &need_lookup); - return state; + return ret; } -static pid_t -get_pid_from_req (fuse_req_t req) +fuse_fd_ctx_t * +__fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd) { - const struct fuse_ctx *ctx = NULL; + uint64_t val = 0; + int32_t ret = 0; + fuse_fd_ctx_t *fd_ctx = NULL; + + ret = __fd_ctx_get (fd, this, &val); + + fd_ctx = (fuse_fd_ctx_t *)(unsigned long) val; - ctx = fuse_req_ctx(req); - return ctx->pid; + if (fd_ctx == NULL) { + fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), + gf_fuse_mt_fd_ctx_t); + if (!fd_ctx) { + goto out; + } + ret = __fd_ctx_set (fd, this, + (uint64_t)(unsigned long)fd_ctx); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "fd-ctx-set failed"); + GF_FREE (fd_ctx); + fd_ctx = NULL; + } + } +out: + return fd_ctx; } +fuse_fd_ctx_t * +fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd) +{ + fuse_fd_ctx_t *fd_ctx = NULL; -static call_frame_t * -get_call_frame_for_req (fuse_state_t *state, char d) + if ((fd == NULL) || (this == NULL)) { + goto out; + } + + LOCK (&fd->lock); + { + fd_ctx = __fuse_fd_ctx_check_n_create (this, fd); + } + UNLOCK (&fd->lock); + +out: + return fd_ctx; +} + +fuse_fd_ctx_t * +fuse_fd_ctx_get (xlator_t *this, fd_t *fd) { - call_pool_t *pool = NULL; - fuse_req_t req = NULL; - const struct fuse_ctx *ctx = NULL; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; + uint64_t value = 0; + int ret = 0; - pool = state->pool; - req = state->req; + ret = fd_ctx_get (fd, this, &value); + if (ret < 0) { + goto out; + } - if (req) { - this = fuse_req_userdata (req); - } else { - this = state->this; - } - priv = this->private; + fdctx = (fuse_fd_ctx_t *) (unsigned long)value; - frame = create_frame (this, pool); +out: + return fdctx; +} - if (req) { - ctx = fuse_req_ctx(req); +/* + * iov_out should contain a fuse_out_header at zeroth position. + * The error value of this header is sent to kernel. + */ +static int +send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out, + int count) +{ + fuse_private_t *priv = NULL; + struct fuse_out_header *fouh = NULL; + int res, i; - frame->root->uid = ctx->uid; - frame->root->gid = ctx->gid; - frame->root->pid = ctx->pid; - frame->root->unique = req_callid (req); + if (!this || !finh || !iov_out) { + gf_log ("send_fuse_iov", GF_LOG_ERROR,"Invalid arguments"); + return EINVAL; } + priv = this->private; + + fouh = iov_out[0].iov_base; + iov_out[0].iov_len = sizeof (*fouh); + fouh->len = 0; + for (i = 0; i < count; i++) + fouh->len += iov_out[i].iov_len; + fouh->unique = finh->unique; - frame->root->type = GF_OP_TYPE_FOP_REQUEST; + res = writev (priv->fd, iov_out, count); - return frame; + if (res == -1) + return errno; + if (res != fouh->len) + return EINVAL; + + if (priv->fuse_dump_fd != -1) { + char w = 'W'; + + pthread_mutex_lock (&priv->fuse_dump_mutex); + res = write (priv->fuse_dump_fd, &w, 1); + if (res != -1) + res = writev (priv->fuse_dump_fd, iov_out, count); + pthread_mutex_unlock (&priv->fuse_dump_mutex); + + if (res == -1) + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "failed to dump fuse message (W): %s", + strerror (errno)); + } + + return 0; } +static int +send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) +{ + struct fuse_out_header fouh = {0, }; + struct iovec iov_out[2]; + + fouh.error = 0; + iov_out[0].iov_base = &fouh; + iov_out[1].iov_base = data; + iov_out[1].iov_len = size; + + return send_fuse_iov (this, finh, iov_out, 2); +} + +#define send_fuse_obj(this, finh, obj) \ + send_fuse_data (this, finh, obj, sizeof (*(obj))) + -GF_MUST_CHECK static int32_t -fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino, - ino_t par, const char *name) +static void +fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) { - inode_t *inode = NULL; - inode_t *parent = NULL; - int32_t ret = -1; - char *path = NULL; - - /* resistance against multiple invocation of loc_fill not to get - reference leaks via inode_search() */ - - inode = loc->inode; - - if (!inode) { - if (ino) - inode = inode_search (state->itable, ino, NULL); - if (par && name) - inode = inode_search (state->itable, par, name); - - loc->inode = inode; - if (inode) - loc->ino = inode->ino; - } - - parent = loc->parent; - if (!parent) { - if (inode) - parent = inode_parent (inode, par, name); - else - parent = inode_search (state->itable, par, NULL); - loc->parent = parent; - } - - if (name && parent) { - ret = inode_path (parent, name, &path); - if (ret <= 0) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "inode_path failed for %"PRId64"/%s", - parent->ino, name); - goto fail; - } else { - loc->path = path; - } - } else if (inode) { - ret = inode_path (inode, NULL, &path); - if (ret <= 0) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "inode_path failed for %"PRId64, - inode->ino); - goto fail; - } else { - loc->path = path; - } - } - if (loc->path) { - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - else loc->name = ""; - } - - if ((ino != 1) && - (parent == NULL)) { - gf_log ("fuse-bridge", GF_LOG_DEBUG, - "failed to search parent for %"PRId64"/%s (%"PRId64")", - (ino_t)par, name, (ino_t)ino); - ret = -1; - goto fail; - } - ret = 0; -fail: - return ret; + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_entry_out *fnieo = NULL; + fuse_private_t *priv = NULL; + dentry_t *dentry = NULL; + inode_t *inode = NULL; + size_t nlen = 0; + int rv = 0; + char inval_buf[INVAL_BUF_SIZE] = {0,}; + + fouh = (struct fuse_out_header *)inval_buf; + fnieo = (struct fuse_notify_inval_entry_out *)(fouh + 1); + + priv = this->private; + if (priv->revchan_out == -1) + return; + + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; + + inode = fuse_ino_to_inode (fuse_ino, this); + + list_for_each_entry (dentry, &inode->dentry_list, inode_list) { + nlen = strlen (dentry->name); + fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1; + fnieo->parent = inode_to_fuse_nodeid (dentry->parent); + + fnieo->namelen = nlen; + strcpy (inval_buf + sizeof (*fouh) + sizeof (*fnieo), dentry->name); + + rv = write (priv->revchan_out, inval_buf, fouh->len); + if (rv != fouh->len) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "kernel notification daemon defunct"); + + close (priv->fd); + break; + } + + gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: " + "%"PRIu64"/%s", fnieo->parent, dentry->name); + + if (dentry->parent) { + fuse_log_eh (this, "Invalidated entry %s (parent: %s)", + dentry->name, + uuid_utoa (dentry->parent->gfid)); + } else { + fuse_log_eh (this, "Invalidated entry %s(nodeid: %ld)", + dentry->name, fnieo->parent); + } + } + + if (inode) + inode_unref (inode); } +/* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +static void +fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) +{ + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_inode_out *fniio = NULL; + fuse_private_t *priv = NULL; + int rv = 0; + char inval_buf[INVAL_BUF_SIZE] = {0}; + inode_t *inode = NULL; -static int -need_fresh_lookup (int32_t op_ret, int32_t op_errno, - loc_t *loc, struct stat *buf) -{ - if (op_ret == -1) { - gf_log ("fuse-bridge", GF_LOG_DEBUG, - "revalidate of %s failed (%s)", - loc->path, strerror (op_errno)); - return 1; - } + fouh = (struct fuse_out_header *) inval_buf; + fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1); + + priv = this->private; + + if (priv->revchan_out < 0) + return; + + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_INODE; + fouh->len = sizeof(struct fuse_out_header) + + sizeof(struct fuse_notify_inval_inode_out); - if (loc->inode->ino != buf->st_ino) { - gf_log ("fuse-bridge", GF_LOG_DEBUG, - "inode num of %s changed %"PRId64" -> %"PRId64, - loc->path, loc->inode->ino, buf->st_ino); - return 1; + /* inval the entire mapping until we learn how to be more granular */ + fniio->ino = fuse_ino; + fniio->off = 0; + fniio->len = -1; + + inode = fuse_ino_to_inode (fuse_ino, this); + + rv = write(priv->revchan_out, inval_buf, fouh->len); + if (rv != fouh->len) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification " + "daemon defunct"); + close(priv->fd); } - if ((loc->inode->st_mode & S_IFMT) ^ (buf->st_mode & S_IFMT)) { - gf_log ("fuse-bridge", GF_LOG_DEBUG, - "inode mode of %s changed 0%o -> 0%o", - loc->path, loc->inode->st_mode, buf->st_mode); - return 1; - } + gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino); - return 0; + if (inode) { + fuse_log_eh (this, "Invalidated inode %lu (gfid: %s)", + fuse_ino, uuid_utoa (inode->gfid)); + } else { + fuse_log_eh (this, "Invalidated inode %lu ", fuse_ino); + } + + if (inode) + inode_unref (inode); } +int +send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) +{ + struct fuse_out_header fouh = {0, }; + struct iovec iov_out; + inode_t *inode = NULL; -static int -fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct stat *stat, dict_t *dict); + fouh.error = -error; + iov_out.iov_base = &fouh; + + inode = fuse_ino_to_inode (finh->nodeid, this); + + // filter out ENOENT + if (error != ENOENT) { + if (inode) { + fuse_log_eh (this,"Sending %s for operation %d on " + "inode %s", strerror (error), finh->opcode, + uuid_utoa (inode->gfid)); + } else { + fuse_log_eh (this, "Sending %s for operation %d on " + "inode %ld", strerror (error), + finh->opcode, finh->nodeid); + } + } + + if (inode) + inode_unref (inode); + + return send_fuse_iov (this, finh, &iov_out, 1); +} static int fuse_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct stat *buf) + inode_t *inode, struct iatt *buf, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_req_t req = NULL; - struct fuse_entry_param e = {0, }; - fuse_private_t *priv = NULL; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + struct fuse_entry_out feo = {0, }; + fuse_private_t *priv = NULL; + inode_t *linked_inode = NULL; priv = this->private; state = frame->root->state; - req = state->req; + finh = state->finh; - if (!op_ret && state->loc.ino == 1) { - buf->st_ino = 1; + if (op_ret == 0) { + if (__is_root_gfid (state->loc.inode->gfid)) + buf->ia_ino = 1; + if (uuid_is_null (buf->ia_gfid)) { + /* With a NULL gfid inode linking is + not possible. Let's not pretend this + call was a "success". + */ + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "Received NULL gfid for %s. Forcing EIO", + state->loc.path); + op_ret = -1; + op_errno = EIO; + } } - if (state->is_revalidate == 1 - && need_fresh_lookup (op_ret, op_errno, &state->loc, buf)) { - inode_unref (state->loc.inode); - state->loc.inode = inode_new (state->itable); - state->is_revalidate = 2; - - STACK_WIND (frame, fuse_lookup_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, - &state->loc, state->dict); - - return 0; - } + /* log into the event-history after the null uuid check is done, since + * the op_ret and op_errno are being changed if the gfid is NULL. + */ + fuse_log_eh (this, "op_ret: %d op_errno: %d " + "%"PRIu64": %s() %s => %s", op_ret, op_errno, + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, (op_ret == 0)? + uuid_utoa(buf->ia_gfid):uuid_utoa(state->loc.gfid)); if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => %"PRId64" (%"PRId64")", - frame->root->unique, gf_fop_list[frame->root->op], - state->loc.path, buf->st_ino, state->loc.ino); + "%"PRIu64": %s() %s => %"PRIu64, + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, buf->ia_ino); - inode_link (inode, state->loc.parent, state->loc.name, buf); + buf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr (buf, &feo.attr, priv->enable_ino32); - inode_lookup (inode); + if (!buf->ia_ino) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": %s() %s returning inode 0", + frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path); + } - /* TODO: make these timeouts configurable (via meta?) */ - e.ino = inode->ino; + linked_inode = inode_link (inode, state->loc.parent, + state->loc.name, buf); -#ifdef GF_DARWIN_HOST_OS - e.generation = 0; -#else - e.generation = buf->st_ctime; -#endif + if (linked_inode != inode) { + } - e.entry_timeout = priv->entry_timeout; - e.attr_timeout = priv->attribute_timeout; - e.attr = *buf; - e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE; - - if (!e.ino || !buf->st_ino) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s() %s returning inode 0", - frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path); - } + inode_lookup (linked_inode); - if (state->loc.parent) - fuse_reply_entry (req, &e); - else - fuse_reply_attr (req, buf, priv->attribute_timeout); + feo.nodeid = inode_to_fuse_nodeid (linked_inode); + + inode_unref (linked_inode); + + feo.entry_valid = + calc_timeout_sec (priv->entry_timeout); + feo.entry_valid_nsec = + calc_timeout_nsec (priv->entry_timeout); + feo.attr_valid = + calc_timeout_sec (priv->attribute_timeout); + feo.attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); + +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv->proto_minor >= 9 ? + send_fuse_obj (this, finh, &feo) : + send_fuse_data (this, finh, &feo, + FUSE_COMPAT_ENTRY_OUT_SIZE); +#else + send_fuse_obj (this, finh, &feo); +#endif } else { gf_log ("glusterfs-fuse", (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_WARNING), - "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, gf_fop_list[frame->root->op], state->loc.path, - strerror (op_errno)); - fuse_reply_err (req, op_errno); + strerror (op_errno)); + + if ((op_errno == ENOENT) && (priv->negative_timeout != 0)) { + feo.entry_valid = + calc_timeout_sec (priv->negative_timeout); + feo.entry_valid_nsec = + calc_timeout_nsec (priv->negative_timeout); + send_fuse_obj (this, finh, &feo); + } else { + send_fuse_err (this, state->finh, op_errno); + } } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +static int +fuse_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, buf, + xdata); + return 0; +} static int fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct stat *stat, dict_t *dict) + inode_t *inode, struct iatt *stat, dict_t *dict, + struct iatt *postparent) { - fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat); - return 0; -} + fuse_state_t *state = NULL; + call_frame_t *prev = NULL; + inode_table_t *itable = NULL; + state = frame->root->state; + prev = cookie; -static void -fuse_lookup (fuse_req_t req, fuse_ino_t par, const char *name) -{ - fuse_state_t *state = NULL; - int32_t ret = -1; - - state = state_from_req (req); + if (op_ret == -1 && state->is_revalidate == 1) { + itable = state->itable; + /* + * A stale mapping might exist for a dentry/inode that has been + * removed from another client. + */ + if (op_errno == ENOENT) + inode_unlink(state->loc.inode, state->loc.parent, + state->loc.name); + inode_unref (state->loc.inode); + state->loc.inode = inode_new (itable); + state->is_revalidate = 2; + if (uuid_is_null (state->gfid)) + uuid_generate (state->gfid); + fuse_gfid_set (state); - ret = fuse_loc_fill (&state->loc, state, 0, par, name); + STACK_WIND (frame, fuse_lookup_cbk, + prev->this, prev->this->fops->lookup, + &state->loc, state->xdata); + return 0; + } - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": LOOKUP %"PRId64"/%s (fuse_loc_fill() failed)", - req_callid (req), (ino_t)par, name); - free_state (state); - fuse_reply_err (req, ENOENT); - return; - } + fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat, + dict); + return 0; +} - if (!state->loc.inode) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": LOOKUP %s", req_callid (req), +void +fuse_lookup_resume (fuse_state_t *state) +{ + if (!state->loc.parent && !state->loc.inode) { + gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s", state->loc.path); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } - state->loc.inode = inode_new (state->itable); - /* to differntiate in entry_cbk what kind of call it is */ - state->is_revalidate = -1; - } else { + /* parent was resolved, entry could not, may be a missing gfid? + * Hence try to do a regular lookup + */ + if ((state->resolve.op_ret == -1) + && (state->resolve.op_errno == ENODATA)) { + state->resolve.op_ret = 0; + } + + if (state->loc.inode) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": LOOKUP %s(%"PRId64")", req_callid (req), - state->loc.path, state->loc.inode->ino); + "%"PRIu64": LOOKUP %s(%s)", state->finh->unique, + state->loc.path, uuid_utoa (state->loc.inode->gfid)); state->is_revalidate = 1; + } else { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": LOOKUP %s", state->finh->unique, + state->loc.path); + state->loc.inode = inode_new (state->loc.parent->table); + if (uuid_is_null (state->gfid)) + uuid_generate (state->gfid); + fuse_gfid_set (state); } - - state->dict = dict_new(); FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP, - lookup, &state->loc, state->dict); + lookup, &state->loc, state->xdata); } +static void +fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + char *name = msg; + fuse_state_t *state = NULL; + + GET_STATE (this, finh, state); + + (void) fuse_resolve_entry_init (state, &state->resolve, + finh->nodeid, name); + + fuse_resolve_and_resume (state, fuse_lookup_resume); + + return; +} + +static inline void +do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup) +{ + inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this); + + fuse_log_eh(this, "%"PRIu64": FORGET %"PRIu64"/%"PRIu64" gfid: (%s)", + unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); + + inode_forget(fuse_inode, nlookup); + inode_unref(fuse_inode); +} static void -fuse_forget (fuse_req_t req, fuse_ino_t ino, unsigned long nlookup) +fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg) + { - inode_t *fuse_inode; - fuse_state_t *state; + struct fuse_forget_in *ffi = msg; - if (ino == 1) { - fuse_reply_none (req); + if (finh->nodeid == 1) { + GF_FREE (finh); return; } - state = state_from_req (req); - fuse_inode = inode_search (state->itable, ino, NULL); - if (fuse_inode) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "got forget on inode (%lu)", ino); - inode_forget (fuse_inode, nlookup); - inode_unref (fuse_inode); - } else { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "got forget, but inode (%lu) not found", ino); - } + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": FORGET %"PRIu64"/%"PRIu64, + finh->unique, finh->nodeid, ffi->nlookup); - free_state (state); - fuse_reply_none (req); + do_forget(this, finh->unique, finh->nodeid, ffi->nlookup); + + GF_FREE (finh); } +static void +fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_batch_forget_in *fbfi = msg; + struct fuse_forget_one *ffo = (struct fuse_forget_one *) (fbfi + 1); + int i; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": BATCH_FORGET %"PRIu64"/%"PRIu32, + finh->unique, finh->nodeid, fbfi->count); + + for (i = 0; i < fbfi->count; i++) { + if (ffo[i].nodeid == 1) + continue; + do_forget(this, finh->unique, ffo[i].nodeid, ffo[i].nlookup); + } + + GF_FREE(finh); +} static int -fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) +fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - fuse_state_t *state; - fuse_req_t req; - fuse_private_t *priv = NULL; + fuse_state_t *state; + fuse_in_header_t *finh; + fuse_private_t *priv = NULL; + struct fuse_attr_out fao; priv = this->private; state = frame->root->state; - req = state->req; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => %"PRId64, frame->root->unique, + "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique, gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - buf->st_ino); + state->loc.path ? state->loc.path : "ERR", + prebuf->ia_ino); - /* TODO: make these timeouts configurable via meta */ - /* TODO: what if the inode number has changed by now */ - buf->st_blksize = BIG_FUSE_CHANNEL_SIZE; + postbuf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr (postbuf, &fao.attr, priv->enable_ino32); - fuse_reply_attr (req, buf, priv->attribute_timeout); + fao.attr_valid = calc_timeout_sec (priv->attribute_timeout); + fao.attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); + +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv->proto_minor >= 9 ? + send_fuse_obj (this, finh, &fao) : + send_fuse_data (this, finh, &fao, + FUSE_COMPAT_ATTR_OUT_SIZE); +#else + send_fuse_obj (this, finh, &fao); +#endif } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", + state->loc.path ? state->loc.path : "ERR", strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); } - - free_state (state); + + free_fuse_state (state); STACK_DESTROY (frame->root); + return 0; } - -static void -fuse_getattr (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +static int +fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - fuse_state_t *state; - fd_t *fd = NULL; - int32_t ret = -1; - - state = state_from_req (req); - - if (ino == 1) { - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": GETATTR %"PRId64" (fuse_loc_fill() failed)", - req_callid(req), (ino_t)ino); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + fuse_state_t *state; + fuse_in_header_t *finh; + fuse_private_t *priv = NULL; + struct fuse_attr_out fao; - if (state->loc.inode) - state->is_revalidate = 1; - else - state->is_revalidate = -1; + priv = this->private; + state = frame->root->state; + finh = state->finh; + + fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() %s => " + "gfid: %s", op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : ""); + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + buf->ia_ino); - state->dict = dict_new(); + buf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr (buf, &fao.attr, priv->enable_ino32); - FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP, - lookup, &state->loc, state->dict); - return; + fao.attr_valid = calc_timeout_sec (priv->attribute_timeout); + fao.attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); + +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv->proto_minor >= 9 ? + send_fuse_obj (this, finh, &fao) : + send_fuse_data (this, finh, &fao, + FUSE_COMPAT_ATTR_OUT_SIZE); +#else + send_fuse_obj (this, finh, &fao); +#endif + } else { + GF_LOG_OCCASIONALLY ( gf_fuse_conn_err_log, "glusterfs-fuse", + GF_LOG_WARNING, + "%"PRIu64": %s() %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + strerror (op_errno)); + + send_fuse_err (this, finh, op_errno); } - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + free_fuse_state (state); + STACK_DESTROY (frame->root); + return 0; +} + +static int +fuse_root_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stat, dict_t *dict, + struct iatt *postparent) +{ + fuse_attr_cbk (frame, cookie, this, op_ret, op_errno, stat, dict); + + return 0; +} + +void +fuse_getattr_resume (fuse_state_t *state) +{ if (!state->loc.inode) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": GETATTR %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)", - req_callid (req), (int64_t)ino, state->loc.path); - fuse_reply_err (req, ENOENT); + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRIu64": GETATTR %"PRIu64" (%s) resolution failed", + state->finh->unique, state->finh->nodeid, + uuid_utoa (state->resolve.gfid)); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - - fd = fd_lookup (state->loc.inode, get_pid_from_req (req)); - state->fd = fd; - if (!fd || S_ISDIR (state->loc.inode->st_mode)) { - /* this is the @ret of fuse_loc_fill, checked here - to permit fstat() to happen even when fuse_loc_fill fails - */ - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": GETATTR %"PRId64" (fuse_loc_fill() failed)", - req_callid(req), (ino_t)ino); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + if (!IA_ISDIR (state->loc.inode->ia_type)) { + state->fd = fd_lookup (state->loc.inode, state->finh->pid); + } + + if (!state->fd) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": GETATTR %"PRId64" (%s)", - req_callid (req), (int64_t)ino, state->loc.path); + "%"PRIu64": GETATTR %"PRIu64" (%s)", + state->finh->unique, state->finh->nodeid, + state->loc.path); - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT, - stat, &state->loc); + stat, &state->loc, state->xdata); } else { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": FGETATTR %"PRId64" (%s/%p)", - req_callid (req), (int64_t)ino, state->loc.path, fd); + "%"PRIu64": FGETATTR %"PRIu64" (%s/%p)", + state->finh->unique, state->finh->nodeid, + state->loc.path, state->fd); - FUSE_FOP (state,fuse_attr_cbk, GF_FOP_FSTAT, - fstat, fd); + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FSTAT, + fstat, state->fd, state->xdata); } } +static void +fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + fuse_state_t *state; + int32_t ret = -1; + + GET_STATE (this, finh, state); + + if (finh->nodeid == 1) { + state->gfid[15] = 1; + + ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": GETATTR on / (fuse_loc_fill() failed)", + finh->unique); + send_fuse_err (this, finh, ENOENT); + free_fuse_state (state); + return; + } + + fuse_gfid_set (state); + + FUSE_FOP (state, fuse_root_lookup_cbk, GF_FOP_LOOKUP, + lookup, &state->loc, state->xdata); + return; + } + + fuse_resolve_inode_init (state, &state->resolve, state->finh->nodeid); + + fuse_resolve_and_resume (state, fuse_getattr_resume); +} + +static int32_t +fuse_fd_inherit_directio (xlator_t *this, fd_t *fd, struct fuse_open_out *foo) +{ + int32_t ret = 0; + fuse_fd_ctx_t *fdctx = NULL, *tmp_fdctx = NULL; + fd_t *tmp_fd = NULL; + + GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", this, out, ret, + -EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", fd, out, ret, + -EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", foo, out, ret, + -EINVAL); + + fdctx = fuse_fd_ctx_get (this, fd); + if (!fdctx) { + ret = -ENOMEM; + goto out; + } + + tmp_fd = fd_lookup (fd->inode, 0); + if (tmp_fd) { + tmp_fdctx = fuse_fd_ctx_get (this, tmp_fd); + if (tmp_fdctx) { + foo->open_flags &= ~FOPEN_DIRECT_IO; + foo->open_flags |= (tmp_fdctx->open_flags + & FOPEN_DIRECT_IO); + } + } + + fdctx->open_flags |= (foo->open_flags & FOPEN_DIRECT_IO); + + if (tmp_fd != NULL) { + fd_unref (tmp_fd); + } + + ret = 0; +out: + return ret; +} static int fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - fuse_state_t *state; - fuse_req_t req; - fuse_private_t *priv = NULL; - struct fuse_file_info fi = {0, }; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + fuse_private_t *priv = NULL; + int32_t ret = 0; + struct fuse_open_out foo = {0, }; priv = this->private; state = frame->root->state; - req = state->req; + finh = state->finh; - if (op_ret >= 0) { - fi.fh = (unsigned long) fd; - fi.flags = state->flags; + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); - if (!S_ISDIR (fd->inode->st_mode)) { - if ((fi.flags & 3) && priv->direct_io_mode) - fi.direct_io = 1; + if (op_ret >= 0) { + foo.fh = (uintptr_t) fd; + foo.open_flags = 0; + + if (!IA_ISDIR (fd->inode->ia_type)) { + if (((priv->direct_io_mode == 2) + && ((state->flags & O_ACCMODE) != O_RDONLY)) + || (priv->direct_io_mode == 1)) + foo.open_flags |= FOPEN_DIRECT_IO; +#ifdef GF_DARWIN_HOST_OS + /* In Linux: by default, buffer cache + * is purged upon open, setting + * FOPEN_KEEP_CACHE implies no-purge + * + * In MacFUSE: by default, buffer cache + * is left intact upon open, setting + * FOPEN_PURGE_UBC implies purge + * + * [[Interesting...]] + */ + if (!priv->fopen_keep_cache) + foo.open_flags |= FOPEN_PURGE_UBC; +#else + /* + * If fopen-keep-cache is enabled, we set the associated + * flag here such that files are not invalidated on open. + * File invalidations occur either in fuse or explicitly + * when the cache is set invalid on the inode. + */ + if (priv->fopen_keep_cache) + foo.open_flags |= FOPEN_KEEP_CACHE; +#endif } gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => %p", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, fd); + "%"PRIu64": %s() %s => %p", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, fd); - fd_ref (fd); - if (fuse_reply_open (req, &fi) == -ENOENT) { + ret = fuse_fd_inherit_directio (this, fd, &foo); + if (ret < 0) { + op_errno = -ret; + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "cannot inherit direct-io values for fd " + "(ptr:%p inode-gfid:%s) from fds already " + "opened", fd, uuid_utoa (fd->inode->gfid)); + goto err; + } + + if (send_fuse_obj (this, finh, &foo) == ENOENT) { gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "open(%s) got EINTR", state->loc.path); - fd_unref (fd); - goto out; + "open(%s) got EINTR", state->loc.path); + gf_fd_put (priv->fdtable, state->fd_no); + goto out; } - - fd_bind (fd); + + fd_bind (fd); } else { + err: gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, gf_fop_list[frame->root->op], state->loc.path, - strerror (op_errno)); + strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); + gf_fd_put (priv->fdtable, state->fd_no); } out: - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } - static void -do_chmod (fuse_req_t req, fuse_ino_t ino, struct stat *attr, - struct fuse_file_info *fi) +fuse_do_truncate (fuse_state_t *state, size_t size) { - fuse_state_t *state = NULL; - fd_t *fd = NULL; - int32_t ret = -1; - - state = state_from_req (req); - if (fi) { - fd = FI_TO_FD (fi); - state->fd = fd; + if (state->fd) { + FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_FTRUNCATE, + ftruncate, state->fd, size, state->xdata); + } else { + FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_TRUNCATE, + truncate, &state->loc, size, state->xdata); } - if (fd) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": FCHMOD %p", req_callid (req), fd); + return; +} - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHMOD, - fchmod, fd, attr->st_mode); - } else { - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); +static int +fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, dict_t *xdata) +{ + fuse_state_t *state; + fuse_in_header_t *finh; + fuse_private_t *priv = NULL; + struct fuse_attr_out fao; - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": CHMOD %"PRId64" (%s) (fuse_loc_fill() failed)", - req_callid (req), (int64_t)ino, - state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + int op_done = 0; + priv = this->private; + state = frame->root->state; + finh = state->finh; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": CHMOD %s", req_callid (req), - state->loc.path); + fuse_log_eh(this, "op_ret: %d, op_errno: %d, %"PRIu64", %s() %s => " + "gfid: %s", op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : ""); - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHMOD, - chmod, &state->loc, attr->st_mode); - } -} + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + statpost->ia_ino); + statpost->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr (statpost, &fao.attr, priv->enable_ino32); -static void -do_chown (fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, struct fuse_file_info *fi) -{ - fuse_state_t *state = NULL; - fd_t *fd = NULL; - int32_t ret = -1; - uid_t uid = 0; - gid_t gid = 0; + fao.attr_valid = calc_timeout_sec (priv->attribute_timeout); + fao.attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); - uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t) -1; - gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t) -1; - state = state_from_req (req); + if (state->truncate_needed) { + fuse_do_truncate (state, state->size); + } else { +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv->proto_minor >= 9 ? + send_fuse_obj (this, finh, &fao) : + send_fuse_data (this, finh, &fao, + FUSE_COMPAT_ATTR_OUT_SIZE); +#else + send_fuse_obj (this, finh, &fao); +#endif + op_done = 1; + } + } else { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + strerror (op_errno)); - if (fi) { - fd = FI_TO_FD (fi); - state->fd = fd; + send_fuse_err (this, finh, op_errno); + op_done = 1; } - if (fd) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": FCHOWN %p", req_callid (req), fd); - - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHOWN, - fchown, fd, uid, gid); - } else { - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": CHOWN %"PRId64" (%s) (fuse_loc_fill() failed)", - req_callid (req), (int64_t)ino, - state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + if (op_done) { + free_fuse_state (state); + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": CHOWN %s", req_callid (req), - state->loc.path); + STACK_DESTROY (frame->root); - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHOWN, - chown, &state->loc, uid, gid); - } + return 0; } - -static void -do_truncate (fuse_req_t req, fuse_ino_t ino, struct stat *attr, - struct fuse_file_info *fi) +static int32_t +fattr_to_gf_set_attr (int32_t valid) { - fuse_state_t *state = NULL; - fd_t *fd = NULL; - int32_t ret = -1; + int32_t gf_valid = 0; - state = state_from_req (req); - - if (fi) { - fd = FI_TO_FD (fi); - state->fd = fd; - } + if (valid & FATTR_MODE) + gf_valid |= GF_SET_ATTR_MODE; - if (fd) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": FTRUNCATE %p/%"PRId64, req_callid (req), - fd, attr->st_size); + if (valid & FATTR_UID) + gf_valid |= GF_SET_ATTR_UID; - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FTRUNCATE, - ftruncate, fd, attr->st_size); - } else { - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": TRUNCATE %s/%"PRId64" (fuse_loc_fill() failed)", - req_callid (req), state->loc.path, - attr->st_size); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + if (valid & FATTR_GID) + gf_valid |= GF_SET_ATTR_GID; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": TRUNCATE %s/%"PRId64"(%lu)", - req_callid (req), - state->loc.path, attr->st_size, ino); + if (valid & FATTR_ATIME) + gf_valid |= GF_SET_ATTR_ATIME; - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_TRUNCATE, - truncate, &state->loc, attr->st_size); - } + if (valid & FATTR_MTIME) + gf_valid |= GF_SET_ATTR_MTIME; - return; + if (valid & FATTR_SIZE) + gf_valid |= GF_SET_ATTR_SIZE; + + return gf_valid; } +#define FATTR_MASK (FATTR_SIZE \ + | FATTR_UID | FATTR_GID \ + | FATTR_ATIME | FATTR_MTIME \ + | FATTR_MODE) -static void -do_utimes (fuse_req_t req, fuse_ino_t ino, struct stat *attr) +void +fuse_setattr_resume (fuse_state_t *state) { - fuse_state_t *state = NULL; - struct timespec tv[2]; - int32_t ret = -1; - - tv[0].tv_sec = attr->st_atime; - tv[0].tv_nsec = ST_ATIM_NSEC(attr); - tv[1].tv_sec = attr->st_mtime; - tv[1].tv_nsec = ST_ATIM_NSEC(attr); - - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": UTIMENS %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + if (!state->fd && !state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRIu64": SETATTR %"PRIu64" (%s) resolution failed", + state->finh->unique, state->finh->nodeid, + uuid_utoa (state->resolve.gfid)); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": UTIMENS (%lu)%s", req_callid (req), - ino, state->loc.path); + "%"PRIu64": SETATTR (%"PRIu64")%s", state->finh->unique, + state->finh->nodeid, state->loc.path); + +#ifdef GF_TEST_FFOP + /* this is for calls like 'fchmod()' */ + if (!state->fd) + state->fd = fd_lookup (state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) { + if (state->fd && + !((state->valid & FATTR_ATIME) || + (state->valid & FATTR_MTIME))) { + /* + there is no "futimes" call, so don't send + fsetattr if ATIME or MTIME is set + */ + + FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_FSETATTR, + fsetattr, state->fd, &state->attr, + fattr_to_gf_set_attr (state->valid), + state->xdata); + } else { + FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_SETATTR, + setattr, &state->loc, &state->attr, + fattr_to_gf_set_attr (state->valid), + state->xdata); + } + } else { + fuse_do_truncate (state, state->size); + } - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_UTIMENS, - utimens, &state->loc, tv); } - static void -fuse_setattr (fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, struct fuse_file_info *fi) +fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_setattr_in *fsi = msg; - if (valid & FUSE_SET_ATTR_MODE) - do_chmod (req, ino, attr, fi); - else if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) - do_chown (req, ino, attr, valid, fi); - else if (valid & FUSE_SET_ATTR_SIZE) - do_truncate (req, ino, attr, fi); - else if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) - do_utimes (req, ino, attr); - else - fuse_getattr (req, ino, fi); -} + fuse_private_t *priv = NULL; + fuse_state_t *state = NULL; + GET_STATE (this, finh, state); -static int gf_fuse_xattr_enotsup_log; + if (fsi->valid & FATTR_FH && + !(fsi->valid & (FATTR_ATIME|FATTR_MTIME))) { + /* We need no loc if kernel sent us an fd and + * we are not fiddling with times */ + state->fd = FH_TO_FD (fsi->fh); + fuse_resolve_fd_init (state, &state->resolve, state->fd); + } else { + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + } + + /* + * This is just stub code demonstrating how to retrieve + * lock_owner in setattr, according to the FUSE proto. + * We do not make use of ATM. Its purpose is supporting + * mandatory locking, but getting that right is further + * down the road. Cf. + * + * http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/ + * 4962/focus=4982 + * + * http://git.kernel.org/?p=linux/kernel/git/torvalds/ + * linux-2.6.git;a=commit;h=v2.6.23-5896-gf333211 + */ + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + if (priv->proto_minor >= 9 && fsi->valid & FATTR_LOCKOWNER) + state->lk_owner = fsi->lock_owner; +#endif + + state->valid = fsi->valid; + + if ((fsi->valid & (FATTR_MASK)) != FATTR_SIZE) { + if (fsi->valid & FATTR_SIZE) { + state->size = fsi->size; + state->truncate_needed = _gf_true; + } + + state->attr.ia_size = fsi->size; + state->attr.ia_atime = fsi->atime; + state->attr.ia_mtime = fsi->mtime; + state->attr.ia_atime_nsec = fsi->atimensec; + state->attr.ia_mtime_nsec = fsi->mtimensec; + + state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode); + state->attr.ia_uid = fsi->uid; + state->attr.ia_gid = fsi->gid; + } else { + state->size = fsi->size; + } + + fuse_resolve_and_resume (state, fuse_setattr_resume); +} static int fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { fuse_state_t *state = frame->root->state; - fuse_req_t req = state->req; + fuse_in_header_t *finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => 0", frame->root->unique, - gf_fop_list[frame->root->op], + "%"PRIu64": %s() %s => 0", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path ? state->loc.path : "ERR"); - fuse_reply_err (req, 0); + send_fuse_err (this, finh, 0); } else { - if (frame->root->op == GF_FOP_SETXATTR) { - op_ret = gf_compat_setxattr (state->dict); - if (op_ret == 0) - op_errno = 0; - if (op_errno == ENOTSUP) { - gf_fuse_xattr_enotsup_log++; - if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER)) - gf_log ("glusterfs-fuse", - GF_LOG_CRITICAL, - "extended attribute not " - "supported by the backend " - "storage"); - } - } else { - if ((frame->root->op == GF_FOP_REMOVEXATTR) - && (op_errno == ENOATTR)) { - goto nolog; - } - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s() %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - strerror (op_errno)); - } - nolog: + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": %s() %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +static int +fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata); +} + +static int +fuse_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + if (op_ret == -1 && op_errno == ENOTSUP) + GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log, + "glusterfs-fuse", GF_LOG_CRITICAL, + "extended attribute not supported " + "by the backend storage"); + + return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata); +} static int fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_req_t req = NULL; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; state = frame->root->state; - req = state->req; + finh = state->finh; - if (op_ret == 0) - inode_unlink (state->loc.inode, state->loc.parent, - state->loc.name); + fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() %s => " + "gfid: %s", op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : ""); if (op_ret == 0) { + inode_unlink (state->loc.inode, state->loc.parent, + state->loc.name); gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => 0", frame->root->unique, + "%"PRIu64": %s() %s => 0", frame->root->unique, gf_fop_list[frame->root->op], state->loc.path); - fuse_reply_err (req, 0); + send_fuse_err (this, finh, 0); } else { gf_log ("glusterfs-fuse", op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_WARNING, - "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, gf_fop_list[frame->root->op], state->loc.path, - strerror (op_errno)); + strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +void +fuse_access_resume (fuse_state_t *state) +{ + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRIu64": ACCESS %"PRIu64" (%s) resolution failed", + state->finh->unique, state->finh->nodeid, + uuid_utoa (state->resolve.gfid)); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64" ACCESS %s/%"PRIu64" mask=%d", + state->finh->unique, state->loc.path, + state->finh->nodeid, state->mask); + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_ACCESS, access, + &state->loc, state->mask, state->xdata); +} static void -fuse_access (fuse_req_t req, fuse_ino_t ino, int mask) +fuse_access (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_access_in *fai = msg; fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); + GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": ACCESS %"PRId64" (%s) (fuse_loc_fill() failed)", - req_callid (req), (int64_t)ino, state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64" ACCESS %s/%lu mask=%d", req_callid (req), - state->loc.path, ino, mask); + state->mask = fai->mask; - FUSE_FOP (state, fuse_err_cbk, - GF_FOP_ACCESS, access, - &state->loc, mask); + fuse_resolve_and_resume (state, fuse_access_resume); return; } - static int fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *linkname) + int32_t op_ret, int32_t op_errno, const char *linkname, + struct iatt *buf, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_req_t req = NULL; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; state = frame->root->state; - req = state->req; + finh = state->finh; + + fuse_log_eh (this, "op_ret: %d, op_errno: %d %"PRIu64": %s() => %s" + " linkname: %s, gfid: %s", op_ret, op_errno, + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.gfid, linkname, + uuid_utoa (state->loc.gfid)); if (op_ret > 0) { ((char *)linkname)[op_ret] = '\0'; gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s => %s", frame->root->unique, + "%"PRIu64": %s => %s", frame->root->unique, state->loc.path, linkname); - fuse_reply_readlink(req, linkname); + send_fuse_data (this, finh, (void *)linkname, op_ret + 1); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s => -1 (%s)", frame->root->unique, - state->loc.path, strerror(op_errno)); + "%"PRIu64": %s => -1 (%s)", frame->root->unique, + state->loc.path, strerror (op_errno)); - fuse_reply_err(req, op_errno); + send_fuse_err (this, finh, op_errno); } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } - -static void -fuse_readlink (fuse_req_t req, fuse_ino_t ino) +void +fuse_readlink_resume (fuse_state_t *state) { - fuse_state_t *state = NULL; - int32_t ret = -1; - - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64" READLINK %s/%"PRId64" (fuse_loc_fill() returned NULL inode)", - req_callid (req), state->loc.path, - state->loc.inode->ino); - fuse_reply_err (req, ENOENT); - free_state (state); + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "READLINK %"PRIu64" (%s) resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid)); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - + gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64" READLINK %s/%"PRId64, req_callid (req), - state->loc.path, state->loc.inode->ino); + "%"PRIu64" READLINK %s/%s", state->finh->unique, + state->loc.path, uuid_utoa (state->loc.inode->gfid)); FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK, - readlink, &state->loc, 4096); - - return; + readlink, &state->loc, 4096, state->xdata); } - static void -fuse_mknod (fuse_req_t req, fuse_ino_t par, const char *name, - mode_t mode, dev_t rdev) +fuse_readlink (xlator_t *this, fuse_in_header_t *finh, void *msg) { fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, 0, par, name); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64" MKNOD %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + + fuse_resolve_and_resume (state, fuse_readlink_resume); + + return; +} + +void +fuse_mknod_resume (fuse_state_t *state) +{ + if (!state->loc.parent) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "MKNOD %"PRIu64"/%s (%s/%s) resolution failed", + state->finh->nodeid, state->resolve.bname, + uuid_utoa (state->resolve.gfid), state->resolve.bname); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; - } + } + + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } - state->loc.inode = inode_new (state->itable); + if (state->loc.inode) { + gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref (state->loc.inode); + state->loc.inode = NULL; + } + + state->loc.inode = inode_new (state->loc.parent->table); gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": MKNOD %s", req_callid (req), + "%"PRIu64": MKNOD %s", state->finh->unique, state->loc.path); - FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKNOD, - mknod, &state->loc, mode, rdev); + FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKNOD, + mknod, &state->loc, state->mode, state->rdev, state->umask, + state->xdata); +} + +static void +fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_mknod_in *fmi = msg; + char *name = (char *)(fmi + 1); + + fuse_state_t *state = NULL; + fuse_private_t *priv = NULL; + int32_t ret = -1; + + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >= 12 + if (priv->proto_minor < 12) + name = (char *)msg + FUSE_COMPAT_MKNOD_IN_SIZE; +#endif + + GET_STATE (this, finh, state); + + uuid_generate (state->gfid); + + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); + + state->mode = fmi->mode; + state->rdev = fmi->rdev; + + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >=12 + FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKNOD"); +#endif + + fuse_resolve_and_resume (state, fuse_mknod_resume); return; } - -static void -fuse_mkdir (fuse_req_t req, fuse_ino_t par, const char *name, mode_t mode) +void +fuse_mkdir_resume (fuse_state_t *state) { - fuse_state_t *state; - int32_t ret = -1; - - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, 0, par, name); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64" MKDIR %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + if (!state->loc.parent) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "MKDIR %"PRIu64" (%s/%s) resolution failed", + state->finh->nodeid, uuid_utoa (state->resolve.gfid), + state->resolve.bname); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; - } + } - state->loc.inode = inode_new (state->itable); + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } + + if (state->loc.inode) { + gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref (state->loc.inode); + state->loc.inode = NULL; + } + + state->loc.inode = inode_new (state->loc.parent->table); gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": MKDIR %s", req_callid (req), + "%"PRIu64": MKDIR %s", state->finh->unique, state->loc.path); - FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKDIR, - mkdir, &state->loc, mode); - - return; + FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKDIR, + mkdir, &state->loc, state->mode, state->umask, state->xdata); } - -static void -fuse_unlink (fuse_req_t req, fuse_ino_t par, const char *name) +static void +fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg) { - fuse_state_t *state = NULL; - int32_t ret = -1; + struct fuse_mkdir_in *fmi = msg; + char *name = (char *)(fmi + 1); + fuse_private_t *priv = NULL; - state = state_from_req (req); + fuse_state_t *state; + int32_t ret = -1; - ret = fuse_loc_fill (&state->loc, state, 0, par, name); + GET_STATE (this, finh, state); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": UNLINK %s (fuse_loc_fill() returned NULL inode)", - req_callid (req), state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + uuid_generate (state->gfid); + + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); + + state->mode = fmi->mode; + + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >=12 + FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKDIR"); +#endif + + fuse_resolve_and_resume (state, fuse_mkdir_resume); + + return; +} + +void +fuse_unlink_resume (fuse_state_t *state) +{ + if (!state->loc.parent || !state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "UNLINK %"PRIu64" (%s/%s) resolution failed", + state->finh->nodeid, uuid_utoa (state->resolve.gfid), + state->resolve.bname); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": UNLINK %s", req_callid (req), + "%"PRIu64": UNLINK %s", state->finh->unique, state->loc.path); FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK, - unlink, &state->loc); - - return; + unlink, &state->loc, 0, state->xdata); } - -static void -fuse_rmdir (fuse_req_t req, fuse_ino_t par, const char *name) +static void +fuse_unlink (xlator_t *this, fuse_in_header_t *finh, void *msg) { + char *name = msg; fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, 0, par, name); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": RMDIR %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + GET_STATE (this, finh, state); + + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); + + fuse_resolve_and_resume (state, fuse_unlink_resume); + + return; +} + +void +fuse_rmdir_resume (fuse_state_t *state) +{ + if (!state->loc.parent || !state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "RMDIR %"PRIu64" (%s/%s) resolution failed", + state->finh->nodeid, uuid_utoa (state->resolve.gfid), + state->resolve.bname); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": RMDIR %s", req_callid (req), + "%"PRIu64": RMDIR %s", state->finh->unique, state->loc.path); FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR, - rmdir, &state->loc); + rmdir, &state->loc, 0, state->xdata); +} + +static void +fuse_rmdir (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + char *name = msg; + fuse_state_t *state = NULL; + + GET_STATE (this, finh, state); + + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); + + fuse_resolve_and_resume (state, fuse_rmdir_resume); return; } +void +fuse_symlink_resume (fuse_state_t *state) +{ + if (!state->loc.parent) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "SYMLINK %"PRIu64" (%s/%s) -> %s resolution failed", + state->finh->nodeid, uuid_utoa (state->resolve.gfid), + state->resolve.bname, state->name); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } + + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } + + if (state->loc.inode) { + gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref (state->loc.inode); + state->loc.inode = NULL; + } + + state->loc.inode = inode_new (state->loc.parent->table); + + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": SYMLINK %s -> %s", state->finh->unique, + state->loc.path, state->name); + + FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_SYMLINK, + symlink, state->name, &state->loc, state->umask, state->xdata); +} static void -fuse_symlink (fuse_req_t req, const char *linkname, fuse_ino_t par, - const char *name) +fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg) { + char *name = msg; + char *linkname = name + strlen (name) + 1; fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, 0, par, name); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64" SYMLINK %s -> %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path, linkname); - fuse_reply_err (req, ENOENT); - free_state (state); - return; - } + GET_STATE (this, finh, state); - state->loc.inode = inode_new (state->itable); + uuid_generate (state->gfid); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": SYMLINK %s -> %s", req_callid (req), - state->loc.path, linkname); + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); - FUSE_FOP (state, fuse_entry_cbk, GF_FOP_SYMLINK, - symlink, linkname, &state->loc); + state->name = gf_strdup (linkname); + + fuse_resolve_and_resume (state, fuse_symlink_resume); return; } - -int +int fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_req_t req = NULL; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; state = frame->root->state; - req = state->req; + finh = state->finh; + + fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() " + "path: %s parent: %s ==> path: %s parent: %s" + "gfid: %s", op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.parent?uuid_utoa (state->loc.parent->gfid):"", + state->loc2.path, + state->loc2.parent?uuid_utoa (state->loc2.parent->gfid):"", + state->loc.inode?uuid_utoa (state->loc.inode->gfid):""); if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s -> %s => 0 (buf->st_ino=%"PRId64" , loc->ino=%"PRId64")", - frame->root->unique, state->loc.path, state->loc2.path, - buf->st_ino, state->loc.ino); + "%"PRIu64": %s -> %s => 0 (buf->ia_ino=%"PRIu64")", + frame->root->unique, state->loc.path, state->loc2.path, + buf->ia_ino); { /* ugly ugly - to stay blind to situation where rename happens on a new inode */ - buf->st_ino = state->loc.ino; - buf->st_mode = state->loc.inode->st_mode; + buf->ia_type = state->loc.inode->ia_type; } - inode_rename (state->itable, + buf->ia_blksize = this->ctx->page_size; + + inode_rename (state->loc.parent->table, state->loc.parent, state->loc.name, state->loc2.parent, state->loc2.name, state->loc.inode, buf); - fuse_reply_err (req, 0); + send_fuse_err (this, finh, 0); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s -> %s => -1 (%s)", frame->root->unique, + "%"PRIu64": %s -> %s => -1 (%s)", frame->root->unique, state->loc.path, state->loc2.path, - strerror (op_errno)); - fuse_reply_err (req, op_errno); + strerror (op_errno)); + send_fuse_err (this, finh, op_errno); } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } - -static void -fuse_rename (fuse_req_t req, fuse_ino_t oldpar, const char *oldname, - fuse_ino_t newpar, const char *newname) +void +fuse_rename_resume (fuse_state_t *state) { - fuse_state_t *state = NULL; - int32_t ret = -1; - - state = state_from_req (req); - - ret = fuse_loc_fill (&state->loc, state, 0, oldpar, oldname); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)", - state->loc.path, req_callid (req), state->loc.path, - state->loc2.path); - - fuse_reply_err (req, ENOENT); - free_state (state); + char loc_uuid[64] = {0,}; + char loc2_uuid[64] = {0,}; + + if (!state->loc.parent || !state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "RENAME %"PRIu64" %s/%s -> %s/%s src resolution failed", + state->finh->unique, + uuid_utoa_r (state->resolve.gfid, loc_uuid), + state->resolve.bname, + uuid_utoa_r (state->resolve2.gfid, loc2_uuid), + state->resolve2.bname); + + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - ret = fuse_loc_fill (&state->loc2, state, 0, newpar, newname); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)", - state->loc.path, req_callid (req), state->loc.path, - state->loc2.path); - - fuse_reply_err (req, ENOENT); - free_state (state); + if (!state->loc2.parent) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "RENAME %"PRIu64" %s/%s -> %s/%s dst resolution failed", + state->finh->unique, + uuid_utoa_r (state->resolve.gfid, loc_uuid), + state->resolve.bname, + uuid_utoa_r (state->resolve2.gfid, loc2_uuid), + state->resolve2.bname); + + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; - } + } + + state->resolve.op_ret = 0; + state->resolve2.op_ret = 0; gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": RENAME `%s (%"PRId64")' -> `%s (%"PRId64")'", - req_callid (req), state->loc.path, state->loc.ino, - state->loc2.path, state->loc2.ino); + "%"PRIu64": RENAME `%s (%s)' -> `%s (%s)'", + state->finh->unique, state->loc.path, loc_uuid, + state->loc2.path, loc2_uuid); FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME, - rename, &state->loc, &state->loc2); - - return; + rename, &state->loc, &state->loc2, state->xdata); } - static void -fuse_link (fuse_req_t req, fuse_ino_t ino, fuse_ino_t par, const char *name) +fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_rename_in *fri = msg; + char *oldname = (char *)(fri + 1); + char *newname = oldname + strlen (oldname) + 1; fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); + GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, 0, par, name); - ret = fuse_loc_fill (&state->loc2, state, ino, 0, NULL); + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, oldname); - if ((state->loc2.inode == NULL) || - (ret < 0)) { + fuse_resolve_entry_init (state, &state->resolve2, fri->newdir, newname); + + fuse_resolve_and_resume (state, fuse_rename_resume); + + return; +} + +void +fuse_link_resume (fuse_state_t *state) +{ + if (!state->loc2.inode || !state->loc.parent) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "fuse_loc_fill() failed for %s %"PRId64": LINK %s %s", - state->loc2.path, req_callid (req), - state->loc2.path, state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + "fuse_loc_fill() failed %"PRIu64": LINK %s %s", + state->finh->unique, state->loc2.path, state->loc.path); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } + state->resolve.op_ret = 0; + state->resolve2.op_ret = 0; + + if (state->loc.inode) { + inode_unref (state->loc.inode); + state->loc.inode = NULL; + } state->loc.inode = inode_ref (state->loc2.inode); + gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": LINK() %s (%"PRId64") -> %s (%"PRId64")", - req_callid (req), state->loc2.path, state->loc2.ino, - state->loc.path, state->loc.ino); + "%"PRIu64": LINK() %s -> %s", + state->finh->unique, state->loc2.path, + state->loc.path); + + FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_LINK, + link, &state->loc2, &state->loc, state->xdata); +} + +static void +fuse_link (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_link_in *fli = msg; + char *name = (char *)(fli + 1); + fuse_state_t *state = NULL; + + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve2, fli->oldnodeid); - FUSE_FOP (state, fuse_entry_cbk, GF_FOP_LINK, - link, &state->loc2, &state->loc); + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); + + fuse_resolve_and_resume (state, fuse_link_resume); return; } - static int fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct stat *buf) + fd_t *fd, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_req_t req = NULL; - fuse_private_t *priv = NULL; - struct fuse_file_info fi = {0, }; - struct fuse_entry_param e = {0, }; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + fuse_private_t *priv = NULL; + struct fuse_out_header fouh = {0, }; + struct fuse_entry_out feo = {0, }; + struct fuse_open_out foo = {0, }; + struct iovec iov_out[3]; + inode_t *linked_inode = NULL; state = frame->root->state; priv = this->private; - req = state->req; - fi.flags = state->flags; + finh = state->finh; + foo.open_flags = 0; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret >= 0) { - fi.fh = (unsigned long) fd; + foo.fh = (uintptr_t) fd; - if ((fi.flags & 3) && priv->direct_io_mode) - fi.direct_io = 1; + if (((priv->direct_io_mode == 2) + && ((state->flags & O_ACCMODE) != O_RDONLY)) + || (priv->direct_io_mode == 1)) + foo.open_flags |= FOPEN_DIRECT_IO; gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => %p (ino=%"PRId64")", - frame->root->unique, gf_fop_list[frame->root->op], - state->loc.path, fd, buf->st_ino); + "%"PRIu64": %s() %s => %p (ino=%"PRIu64")", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, fd, buf->ia_ino); - e.ino = buf->st_ino; + buf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr (buf, &feo.attr, priv->enable_ino32); -#ifdef GF_DARWIN_HOST_OS - e.generation = 0; -#else - e.generation = buf->st_ctime; -#endif + linked_inode = inode_link (inode, state->loc.parent, + state->loc.name, buf); + + if (linked_inode != inode) { + /* + VERY racy code (if used anywhere else) + -- don't do this without understanding + */ + inode_unref (fd->inode); + fd->inode = inode_ref (linked_inode); + } - e.entry_timeout = priv->entry_timeout; - e.attr_timeout = priv->attribute_timeout; - e.attr = *buf; - e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE; + inode_lookup (linked_inode); - fi.keep_cache = 0; + inode_unref (linked_inode); - inode_link (inode, state->loc.parent, - state->loc.name, buf); - - inode_lookup (inode); + feo.nodeid = inode_to_fuse_nodeid (linked_inode); - fd_ref (fd); - if (fuse_reply_create (req, &e, &fi) == -ENOENT) { + feo.entry_valid = calc_timeout_sec (priv->entry_timeout); + feo.entry_valid_nsec = calc_timeout_nsec (priv->entry_timeout); + feo.attr_valid = calc_timeout_sec (priv->attribute_timeout); + feo.attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); + + fouh.error = 0; + iov_out[0].iov_base = &fouh; + iov_out[1].iov_base = &feo; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + iov_out[1].iov_len = priv->proto_minor >= 9 ? + sizeof (feo) : + FUSE_COMPAT_ENTRY_OUT_SIZE; +#else + iov_out[1].iov_len = sizeof (feo); +#endif + iov_out[2].iov_base = &foo; + iov_out[2].iov_len = sizeof (foo); + + if (send_fuse_iov (this, finh, iov_out, 3) == ENOENT) { gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "create(%s) got EINTR", state->loc.path); - inode_forget (inode, 1); - fd_unref (fd); - goto out; - } + "create(%s) got EINTR", state->loc.path); + inode_forget (inode, 1); + gf_fd_put (priv->fdtable, state->fd_no); + goto out; + } - fd_bind (fd); + fd_bind (fd); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s => -1 (%s)", req_callid (req), + "%"PRIu64": %s => -1 (%s)", finh->unique, state->loc.path, strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); + gf_fd_put (priv->fdtable, state->fd_no); } out: - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } - -static void -fuse_create (fuse_req_t req, fuse_ino_t par, const char *name, - mode_t mode, struct fuse_file_info *fi) +void +fuse_create_resume (fuse_state_t *state) { - fuse_state_t *state = NULL; - fd_t *fd = NULL; - int32_t ret = -1; + fd_t *fd = NULL; + fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; + + if (!state->loc.parent) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64" CREATE %s/%s resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid), + state->resolve.bname); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } - state = state_from_req (req); - state->flags = fi->flags; + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } - ret = fuse_loc_fill (&state->loc, state, 0, par, name); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64" CREATE %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - fuse_reply_err (req, ENOENT); - free_state (state); + if (state->loc.inode) { + gf_log (state->this->name, GF_LOG_DEBUG, + "inode already present"); + inode_unref (state->loc.inode); + } + + state->loc.inode = inode_new (state->loc.parent->table); + + fd = fd_create (state->loc.inode, state->finh->pid); + if (fd == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64" CREATE cannot create a new fd", + state->finh->unique); + send_fuse_err (state->this, state->finh, ENOMEM); + free_fuse_state (state); return; - } + } - state->loc.inode = inode_new (state->itable); + fdctx = fuse_fd_ctx_check_n_create (state->this, fd); + if (fdctx == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64" CREATE creation of fdctx failed", + state->finh->unique); + fd_unref (fd); + send_fuse_err (state->this, state->finh, ENOMEM); + free_fuse_state (state); + return; + } - fd = fd_create (state->loc.inode, get_pid_from_req (req)); - state->fd = fd; - fd->flags = state->flags; + priv = state->this->private; + + state->fd_no = gf_fd_unused_get (priv->fdtable, fd); + + state->fd = fd_ref (fd); + fd->flags = state->flags; gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": CREATE %s", req_callid (req), + "%"PRIu64": CREATE %s", state->finh->unique, state->loc.path); FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE, - create, &state->loc, state->flags, mode, fd); + create, &state->loc, state->flags, state->mode, + state->umask, fd, state->xdata); - return; } - static void -fuse_open (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg) { +#if FUSE_KERNEL_MINOR_VERSION >= 12 + struct fuse_create_in *fci = msg; +#else + struct fuse_open_in *fci = msg; +#endif + char *name = (char *)(fci + 1); + + fuse_private_t *priv = NULL; fuse_state_t *state = NULL; - fd_t *fd = NULL; - int32_t ret = -1; + int32_t ret = -1; + + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >= 12 + if (priv->proto_minor < 12) + name = (char *)((struct fuse_open_in *)msg + 1); +#endif + + GET_STATE (this, finh, state); - state = state_from_req (req); - state->flags = fi->flags; + uuid_generate (state->gfid); - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { + fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name); + + state->mode = fci->mode; + state->flags = fci->flags; + + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >=12 + FUSE_ENTRY_CREATE(this, priv, finh, state, fci, "CREATE"); +#endif + fuse_resolve_and_resume (state, fuse_create_resume); + + return; +} + +void +fuse_open_resume (fuse_state_t *state) +{ + fd_t *fd = NULL; + fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; + + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRIu64": OPEN %s resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid)); + + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } + + fd = fd_create (state->loc.inode, state->finh->pid); + if (!fd) { + gf_log ("fuse", GF_LOG_ERROR, + "fd is NULL"); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } + + fdctx = fuse_fd_ctx_check_n_create (state->this, fd); + if (fdctx == NULL) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": OPEN %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - - fuse_reply_err (req, ENOENT); - free_state (state); + "%"PRIu64": OPEN creation of fdctx failed", + state->finh->unique); + fd_unref (fd); + send_fuse_err (state->this, state->finh, ENOMEM); + free_fuse_state (state); return; } - fd = fd_create (state->loc.inode, get_pid_from_req (req)); - state->fd = fd; - fd->flags = fi->flags; + priv = state->this->private; + + state->fd_no = gf_fd_unused_get (priv->fdtable, fd); + state->fd = fd_ref (fd); + fd->flags = state->flags; gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": OPEN %s", req_callid (req), + "%"PRIu64": OPEN %s", state->finh->unique, state->loc.path); FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN, - open, &state->loc, fi->flags, fd); + open, &state->loc, state->flags, fd, state->xdata); +} + +static void +fuse_open (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_open_in *foi = msg; + fuse_state_t *state = NULL; + + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + + state->flags = foi->flags; + + fuse_resolve_and_resume (state, fuse_open_resume); return; } - static int fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, - struct stat *stbuf, struct iobref *iobref) + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { fuse_state_t *state = NULL; - fuse_req_t req = NULL; + fuse_in_header_t *finh = NULL; + struct fuse_out_header fouh = {0, }; + struct iovec *iov_out = NULL; state = frame->root->state; - req = state->req; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret >= 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64, - frame->root->unique, - op_ret, state->size, state->off, stbuf->st_size); - - fuse_reply_vec (req, vector, count); + "%"PRIu64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRIu64, + frame->root->unique, + op_ret, state->size, state->off, stbuf->ia_size); + + iov_out = GF_CALLOC (count + 1, sizeof (*iov_out), + gf_fuse_mt_iovec); + if (iov_out) { + fouh.error = 0; + iov_out[0].iov_base = &fouh; + memcpy (iov_out + 1, vector, count * sizeof (*iov_out)); + send_fuse_iov (this, finh, iov_out, count + 1); + GF_FREE (iov_out); + } else + send_fuse_err (this, finh, ENOMEM); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": READ => %d (%s)", frame->root->unique, + "%"PRIu64": READ => %d (%s)", frame->root->unique, op_ret, strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +void +fuse_readv_resume (fuse_state_t *state) +{ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": READ (%p, size=%zu, offset=%"PRIu64")", + state->finh->unique, state->fd, state->size, state->off); + + FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ, readv, state->fd, + state->size, state->off, state->io_flags, state->xdata); +} static void -fuse_readv (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, - struct fuse_file_info *fi) +fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_read_in *fri = msg; + + fuse_private_t *priv = NULL; fuse_state_t *state = NULL; fd_t *fd = NULL; - state = state_from_req (req); - state->size = size; - state->off = off; - - fd = FI_TO_FD (fi); + GET_STATE (this, finh, state); + + fd = FH_TO_FD (fri->fh); state->fd = fd; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": READ (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", - req_callid (req), fd, size, off); + fuse_resolve_fd_init (state, &state->resolve, fd); - FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ, - readv, fd, size, off); + /* See comment by similar code in fuse_settatr */ + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + if (priv->proto_minor >= 9 && fri->read_flags & FUSE_READ_LOCKOWNER) + state->lk_owner = fri->lock_owner; +#endif -} + state->size = fri->size; + state->off = fri->offset; + /* lets ignore 'fri->read_flags', but just consider 'fri->flags' */ + state->io_flags = fri->flags; + fuse_resolve_and_resume (state, fuse_readv_resume); +} static int fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct stat *stbuf) + struct iatt *stbuf, struct iatt *postbuf, dict_t *xdata) { fuse_state_t *state = NULL; - fuse_req_t req = NULL; + fuse_in_header_t *finh = NULL; + struct fuse_write_out fwo = {0, }; state = frame->root->state; - req = state->req; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret >= 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64, - frame->root->unique, - op_ret, state->size, state->off, stbuf->st_size); + "%"PRIu64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRIu64, + frame->root->unique, + op_ret, state->size, state->off, stbuf->ia_size); - fuse_reply_write (req, op_ret); + fwo.size = op_ret; + send_fuse_obj (this, finh, &fwo); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": WRITE => -1 (%s)", frame->root->unique, - strerror(op_errno)); + "%"PRIu64": WRITE => -1 (%s)", frame->root->unique, + strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); } - - free_state (state); + + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } - -static void -fuse_write (fuse_req_t req, fuse_ino_t ino, const char *buf, - size_t size, off_t off, - struct fuse_file_info *fi) +void +fuse_write_resume (fuse_state_t *state) { - fuse_state_t *state = NULL; - struct iovec vector; - fd_t *fd = NULL; - struct iobref *iobref = NULL; - struct iobuf *iobuf = NULL; - - state = state_from_req (req); - state->size = size; - state->off = off; - fd = FI_TO_FD (fi); - state->fd = fd; - vector.iov_base = (void *)buf; - vector.iov_len = size; + struct iobref *iobref = NULL; + struct iobuf *iobuf = NULL; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", - req_callid (req), fd, size, off); iobref = iobref_new (); + if (!iobref) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRIu64": WRITE iobref allocation failed", + state->finh->unique); + send_fuse_err (state->this, state->finh, ENOMEM); + + free_fuse_state (state); + return; + } + iobuf = ((fuse_private_t *) (state->this->private))->iobuf; iobref_add (iobref, iobuf); - FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE, - writev, fd, &vector, 1, off, iobref); + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + state->finh->unique, state->fd, state->size, state->off); + + FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE, writev, state->fd, + &state->vector, 1, state->off, state->io_flags, iobref, + state->xdata); iobref_unref (iobref); - return; } - static void -fuse_flush (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) { - fuse_state_t *state = NULL; - fd_t *fd = NULL; + /* WRITE is special, metadata is attached to in_header, + * and msg is the payload as-is. + */ + struct fuse_write_in *fwi = (struct fuse_write_in *) + (finh + 1); - state = state_from_req (req); - fd = FI_TO_FD (fi); - state->fd = fd; + fuse_private_t *priv = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": FLUSH %p", req_callid (req), fd); + priv = this->private; - FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, - flush, fd); + GET_STATE (this, finh, state); + fd = FH_TO_FD (fwi->fh); + state->fd = fd; + state->size = fwi->size; + state->off = fwi->offset; + + /* lets ignore 'fwi->write_flags', but just consider 'fwi->flags' */ + state->io_flags = fwi->flags; + /* TODO: may need to handle below flag + (fwi->write_flags & FUSE_WRITE_CACHE); + */ + + + fuse_resolve_fd_init (state, &state->resolve, fd); + + /* See comment by similar code in fuse_settatr */ + priv = this->private; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + if (priv->proto_minor >= 9 && fwi->write_flags & FUSE_WRITE_LOCKOWNER) + state->lk_owner = fwi->lock_owner; +#endif + + state->vector.iov_base = msg; + state->vector.iov_len = fwi->size; + + fuse_resolve_and_resume (state, fuse_write_resume); return; } +void +fuse_flush_resume (fuse_state_t *state) +{ + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, + flush, state->fd, state->xdata); +} -static void -fuse_release (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +static void +fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_flush_in *ffi = msg; + fuse_state_t *state = NULL; + fd_t *fd = NULL; + + GET_STATE (this, finh, state); + fd = FH_TO_FD (ffi->fh); + state->fd = fd; - state = state_from_req (req); - state->fd = FI_TO_FD (fi); + fuse_resolve_fd_init (state, &state->resolve, fd); + + state->lk_owner = ffi->lock_owner; gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": RELEASE %p", req_callid (req), state->fd); + "%"PRIu64": FLUSH %p", finh->unique, fd); + + fuse_resolve_and_resume (state, fuse_flush_resume); - fd_unref (state->fd); - - fuse_reply_err (req, 0); - - free_state (state); return; } - -static void -fuse_fsync (fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) +static void +fuse_release (xlator_t *this, fuse_in_header_t *finh, void *msg) { - fuse_state_t *state = NULL; - fd_t *fd = NULL; - - state = state_from_req (req); - fd = FI_TO_FD (fi); + struct fuse_release_in *fri = msg; + fd_t *activefd = NULL; + fd_t *fd = NULL; + uint64_t val = 0; + int ret = 0; + fuse_state_t *state = NULL; + fuse_fd_ctx_t *fdctx = NULL; + fuse_private_t *priv = NULL; + + GET_STATE (this, finh, state); + fd = FH_TO_FD (fri->fh); state->fd = fd; + priv = this->private; + + fuse_log_eh (this, "RELEASE(): %"PRIu64":, fd: %p, gfid: %s", + finh->unique, fd, uuid_utoa (fd->inode->gfid)); + gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": FSYNC %p", req_callid (req), fd); + "%"PRIu64": RELEASE %p", finh->unique, state->fd); + + ret = fd_ctx_del (fd, this, &val); + if (!ret) { + fdctx = (fuse_fd_ctx_t *)(unsigned long)val; + if (fdctx) { + activefd = fdctx->activefd; + if (activefd) { + fd_unref (activefd); + } - FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNC, - fsync, fd, datasync); + GF_FREE (fdctx); + } + } + fd_unref (fd); + state->fd = NULL; + + gf_fdptr_put (priv->fdtable, fd); + + send_fuse_err (this, finh, 0); + + free_fuse_state (state); return; } +void +fuse_fsync_resume (fuse_state_t *state) +{ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": FSYNC %p", state->finh->unique, + state->fd); + + /* fsync_flags: 1 means "datasync" (no defines for this) */ + FUSE_FOP (state, fuse_fsync_cbk, GF_FOP_FSYNC, + fsync, state->fd, (state->flags & 1), state->xdata); +} static void -fuse_opendir (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_fsync_in *fsi = msg; + fuse_state_t *state = NULL; fd_t *fd = NULL; - int32_t ret = -1; - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { + GET_STATE (this, finh, state); + fd = FH_TO_FD (fsi->fh); + state->fd = fd; + + fuse_resolve_fd_init (state, &state->resolve, fd); + + state->flags = fsi->fsync_flags; + fuse_resolve_and_resume (state, fuse_fsync_resume); + return; +} + +void +fuse_opendir_resume (fuse_state_t *state) +{ + fd_t *fd = NULL; + fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; + + priv = state->this->private; + + if (!state->loc.inode) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": OPENDIR %s (fuse_loc_fill() failed)", - req_callid (req), state->loc.path); - - fuse_reply_err (req, ENOENT); - free_state (state); + "%"PRIu64": OPENDIR (%s) resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid)); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - fd = fd_create (state->loc.inode, get_pid_from_req (req)); - state->fd = fd; + fd = fd_create (state->loc.inode, state->finh->pid); + if (fd == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": OPENDIR fd creation failed", + state->finh->unique); + send_fuse_err (state->this, state->finh, ENOMEM); + free_fuse_state (state); + } + + fdctx = fuse_fd_ctx_check_n_create (state->this, fd); + if (fdctx == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": OPENDIR creation of fdctx failed", + state->finh->unique); + fd_unref (fd); + send_fuse_err (state->this, state->finh, ENOMEM); + free_fuse_state (state); + return; + } + + state->fd = fd_ref (fd); + state->fd_no = gf_fd_unused_get (priv->fdtable, fd); gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": OPENDIR %s", req_callid (req), + "%"PRIu64": OPENDIR %s", state->finh->unique, state->loc.path); FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPENDIR, - opendir, &state->loc, fd); + opendir, &state->loc, fd, state->xdata); +} + +static void +fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + /* + struct fuse_open_in *foi = msg; + */ + + fuse_state_t *state = NULL; + + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + + fuse_resolve_and_resume (state, fuse_opendir_resume); } +unsigned char +d_type_from_stat (struct iatt *buf) +{ + unsigned char d_type; + + if (IA_ISLNK (buf->ia_type)) { + d_type = DT_LNK; + + } else if (IA_ISDIR (buf->ia_type)) { + d_type = DT_DIR; + + } else if (IA_ISFIFO (buf->ia_type)) { + d_type = DT_FIFO; + + } else if (IA_ISSOCK (buf->ia_type)) { + d_type = DT_SOCK; + + } else if (IA_ISCHR (buf->ia_type)) { + d_type = DT_CHR; + + } else if (IA_ISBLK (buf->ia_type)) { + d_type = DT_BLK; + + } else if (IA_ISREG (buf->ia_type)) { + d_type = DT_REG; + + } else { + d_type = DT_UNKNOWN; + } + + return d_type; +} static int fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { fuse_state_t *state = NULL; - fuse_req_t req = NULL; - int size = 0; - int entry_size = 0; - char *buf = NULL; - gf_dirent_t *entry = NULL; - struct stat stbuf = {0, }; + fuse_in_header_t *finh = NULL; + int size = 0; + char *buf = NULL; + gf_dirent_t *entry = NULL; + struct fuse_dirent *fde = NULL; + fuse_private_t *priv = NULL; state = frame->root->state; - req = state->req; + finh = state->finh; + priv = state->this->private; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret < 0) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": READDIR => -1 (%s)", frame->root->unique, + "%"PRIu64": READDIR => -1 (%s)", frame->root->unique, strerror (op_errno)); - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); + goto out; + } + + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": READDIR => %d/%"GF_PRI_SIZET",%"PRId64, + frame->root->unique, op_ret, state->size, state->off); + + list_for_each_entry (entry, &entries->list, list) { + size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET + + strlen (entry->d_name)); + } + + if (size <= 0) { + send_fuse_data (this, finh, 0, 0); + goto out; + } + + buf = GF_CALLOC (1, size, gf_fuse_mt_char); + if (!buf) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRIu64": READDIR => -1 (%s)", frame->root->unique, + strerror (ENOMEM)); + send_fuse_err (this, finh, ENOMEM); + goto out; + } + + size = 0; + list_for_each_entry (entry, &entries->list, list) { + fde = (struct fuse_dirent *)(buf + size); + gf_fuse_fill_dirent (entry, fde, priv->enable_ino32); + size += FUSE_DIRENT_SIZE (fde); + } + + send_fuse_data (this, finh, buf, size); + + /* TODO: */ + /* gf_link_inodes_from_dirent (this, state->fd->inode, entries); */ + +out: + free_fuse_state (state); + STACK_DESTROY (frame->root); + GF_FREE (buf); + return 0; + +} + +void +fuse_readdir_resume (fuse_state_t *state) +{ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": READDIR (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + state->finh->unique, state->fd, state->size, state->off); + + FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR, + readdir, state->fd, state->size, state->off, state->xdata); +} + +static void +fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_read_in *fri = msg; + + fuse_state_t *state = NULL; + fd_t *fd = NULL; + + GET_STATE (this, finh, state); + state->size = fri->size; + state->off = fri->offset; + fd = FH_TO_FD (fri->fh); + state->fd = fd; + + fuse_resolve_fd_init (state, &state->resolve, fd); + + fuse_resolve_and_resume (state, fuse_readdir_resume); +} + + +static int +fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + int size = 0; + char *buf = NULL; + gf_dirent_t *entry = NULL; + struct fuse_direntplus *fde = NULL; + struct fuse_entry_out *feo = NULL; + fuse_private_t *priv = NULL; + + state = frame->root->state; + finh = state->finh; + priv = this->private; + + if (op_ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": READDIRP => -1 (%s)", frame->root->unique, + strerror (op_errno)); + + send_fuse_err (this, finh, op_errno); goto out; } gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": READDIR => %d/%"GF_PRI_SIZET",%"PRId64, + "%"PRIu64": READDIRP => %d/%"GF_PRI_SIZET",%"PRId64, frame->root->unique, op_ret, state->size, state->off); list_for_each_entry (entry, &entries->list, list) { - size += fuse_dirent_size (strlen (entry->d_name)); + size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET_DIRENTPLUS + + strlen (entry->d_name)); + } + + if (size <= 0) { + send_fuse_data (this, finh, 0, 0); + goto out; } - buf = CALLOC (1, size); + buf = GF_CALLOC (1, size, gf_fuse_mt_char); if (!buf) { gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "%"PRId64": READDIR => -1 (%s)", frame->root->unique, + "%"PRIu64": READDIRP => -1 (%s)", frame->root->unique, strerror (ENOMEM)); - fuse_reply_err (req, -ENOMEM); + send_fuse_err (this, finh, ENOMEM); goto out; } size = 0; list_for_each_entry (entry, &entries->list, list) { - stbuf.st_ino = entry->d_ino; - entry_size = fuse_dirent_size (strlen (entry->d_name)); - fuse_add_direntry (req, buf + size, entry_size, - entry->d_name, &stbuf, - entry->d_off); - size += entry_size; - } + inode_t *linked_inode; + + fde = (struct fuse_direntplus *)(buf + size); + feo = &fde->entry_out; + fde->dirent.ino = entry->d_ino; + fde->dirent.off = entry->d_off; + fde->dirent.type = entry->d_type; + fde->dirent.namelen = strlen (entry->d_name); + strncpy (fde->dirent.name, entry->d_name, fde->dirent.namelen); + size += FUSE_DIRENTPLUS_SIZE (fde); - fuse_reply_buf (req, (void *)buf, size); + if (!entry->inode) + continue; + entry->d_stat.ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr (&entry->d_stat, &feo->attr, priv->enable_ino32); + + linked_inode = inode_link (entry->inode, state->fd->inode, + entry->d_name, &entry->d_stat); + if (!linked_inode) + continue; + + inode_lookup (linked_inode); + + feo->nodeid = inode_to_fuse_nodeid (linked_inode); + + fuse_inode_set_need_lookup (linked_inode, this); + + inode_unref (linked_inode); + + feo->entry_valid = + calc_timeout_sec (priv->entry_timeout); + feo->entry_valid_nsec = + calc_timeout_nsec (priv->entry_timeout); + feo->attr_valid = + calc_timeout_sec (priv->attribute_timeout); + feo->attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); + } + + send_fuse_data (this, finh, buf, size); out: - free_state (state); - STACK_DESTROY (frame->root); - if (buf) - FREE (buf); - return 0; + free_fuse_state (state); + STACK_DESTROY (frame->root); + GF_FREE (buf); + return 0; + +} + +void +fuse_readdirp_resume (fuse_state_t *state) +{ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": READDIRP (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + state->finh->unique, state->fd, state->size, state->off); + + FUSE_FOP (state, fuse_readdirp_cbk, GF_FOP_READDIRP, + readdirp, state->fd, state->size, state->off, state->xdata); } static void -fuse_readdir (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, - struct fuse_file_info *fi) +fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg) { - fuse_state_t *state = NULL; + struct fuse_read_in *fri = msg; + + fuse_state_t *state = NULL; fd_t *fd = NULL; - state = state_from_req (req); - state->size = size; - state->off = off; - fd = FI_TO_FD (fi); + GET_STATE (this, finh, state); + state->size = fri->size; + state->off = fri->offset; + fd = FH_TO_FD (fri->fh); state->fd = fd; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": READDIR (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", - req_callid (req), fd, size, off); + fuse_resolve_fd_init (state, &state->resolve, fd); - FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR, - readdir, fd, size, off); + fuse_resolve_and_resume (state, fuse_readdirp_resume); } +static int +fuse_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata); +} -static void -fuse_releasedir (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +static void +fuse_fallocate_resume(fuse_state_t *state) { - fuse_state_t *state = NULL; + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": FALLOCATE (%p, flags=%d, size=%zu, offset=%"PRId64")", + state->finh->unique, state->fd, state->flags, state->size, + state->off); + + if (state->flags & FALLOC_FL_PUNCH_HOLE) + FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_DISCARD, discard, + state->fd, state->off, state->size, state->xdata); + else + FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_FALLOCATE, fallocate, + state->fd, (state->flags & FALLOC_FL_KEEP_SIZE), + state->off, state->size, state->xdata); +} + +static void +fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_fallocate_in *ffi = msg; + fuse_state_t *state = NULL; + + GET_STATE(this, finh, state); + state->off = ffi->offset; + state->size = ffi->length; + state->flags = ffi->mode; + state->fd = FH_TO_FD(ffi->fh); + + fuse_resolve_fd_init(state, &state->resolve, state->fd); + fuse_resolve_and_resume(state, fuse_fallocate_resume); +} + + +static void +fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_release_in *fri = msg; + fd_t *activefd = NULL; + uint64_t val = 0; + int ret = 0; + fuse_state_t *state = NULL; + fuse_fd_ctx_t *fdctx = NULL; + fuse_private_t *priv = NULL; + + GET_STATE (this, finh, state); + state->fd = FH_TO_FD (fri->fh); + + priv = this->private; - state = state_from_req (req); - state->fd = FI_TO_FD (fi); + fuse_log_eh (this, "RELEASEDIR (): %"PRIu64": fd: %p, gfid: %s", + finh->unique, state->fd, + uuid_utoa (state->fd->inode->gfid)); gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": RELEASEDIR %p", req_callid (req), state->fd); - - fd_unref (state->fd); + "%"PRIu64": RELEASEDIR %p", finh->unique, state->fd); - fuse_reply_err (req, 0); - - free_state (state); + ret = fd_ctx_del (state->fd, this, &val); + + if (!ret) { + fdctx = (fuse_fd_ctx_t *)(unsigned long)val; + if (fdctx) { + activefd = fdctx->activefd; + if (activefd) { + fd_unref (activefd); + } + + GF_FREE (fdctx); + } + } + + fd_unref (state->fd); + + gf_fdptr_put (priv->fdtable, state->fd); + + state->fd = NULL; + + send_fuse_err (this, finh, 0); + + free_fuse_state (state); return; } +void +fuse_fsyncdir_resume (fuse_state_t *state) +{ + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR, + fsyncdir, state->fd, (state->flags & 1), state->xdata); + +} -static void -fuse_fsyncdir (fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) +static void +fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_fsync_in *fsi = msg; + fuse_state_t *state = NULL; - fd_t *fd = NULL; - - fd = FI_TO_FD (fi); + fd_t *fd = NULL; - state = state_from_req (req); - state->fd = fd; + fd = FH_TO_FD (fsi->fh); - FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR, - fsyncdir, fd, datasync); + GET_STATE (this, finh, state); + state->fd = fd; + + fuse_resolve_fd_init (state, &state->resolve, fd); + + state->flags = fsi->fsync_flags; + fuse_resolve_and_resume (state, fuse_fsyncdir_resume); return; } - static int fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf) + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) { fuse_state_t *state = NULL; - fuse_req_t req = NULL; + fuse_in_header_t *finh = NULL; + fuse_private_t *priv = NULL; + struct fuse_statfs_out fso = {{0, }, }; state = frame->root->state; - req = state->req; - /* - Filesystems (like ZFS on solaris) reports - different ->f_frsize and ->f_bsize. Old coreutils - df tools use statfs() and do not see ->f_frsize. - the ->f_blocks, ->f_bavail and ->f_bfree are - w.r.t ->f_frsize and not ->f_bsize which makes the - df tools report wrong values. - - Scale the block counts to match ->f_bsize. - */ - /* TODO: with old coreutils, f_bsize is taken from stat()'s st_blksize - * so the df with old coreutils this wont work :( - */ + priv = this->private; + finh = state->finh; + + fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s()", + op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op]); if (op_ret == 0) { #ifndef GF_DARWIN_HOST_OS /* MacFUSE doesn't respect anyof these tweaks */ buf->f_blocks *= buf->f_frsize; - buf->f_blocks /= BIG_FUSE_CHANNEL_SIZE; + buf->f_blocks /= this->ctx->page_size; buf->f_bavail *= buf->f_frsize; - buf->f_bavail /= BIG_FUSE_CHANNEL_SIZE; + buf->f_bavail /= this->ctx->page_size; buf->f_bfree *= buf->f_frsize; - buf->f_bfree /= BIG_FUSE_CHANNEL_SIZE; + buf->f_bfree /= this->ctx->page_size; - buf->f_frsize = buf->f_bsize = BIG_FUSE_CHANNEL_SIZE; + buf->f_frsize = buf->f_bsize =this->ctx->page_size; #endif /* GF_DARWIN_HOST_OS */ - fuse_reply_statfs (req, buf); - + fso.st.bsize = buf->f_bsize; + fso.st.frsize = buf->f_frsize; + fso.st.blocks = buf->f_blocks; + fso.st.bfree = buf->f_bfree; + fso.st.bavail = buf->f_bavail; + fso.st.files = buf->f_files; + fso.st.ffree = buf->f_ffree; + fso.st.namelen = buf->f_namemax; + + priv->proto_minor >= 4 ? + send_fuse_obj (this, finh, &fso) : + send_fuse_data (this, finh, &fso, FUSE_COMPAT_STATFS_SIZE); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": ERR => -1 (%s)", frame->root->unique, - strerror(op_errno)); - fuse_reply_err (req, op_errno); + "%"PRIu64": ERR => -1 (%s)", frame->root->unique, + strerror (op_errno)); + send_fuse_err (this, finh, op_errno); } - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +void +fuse_statfs_resume (fuse_state_t *state) +{ + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": STATFS (%s) resolution fail", + state->finh->unique, uuid_utoa (state->resolve.gfid)); + + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": STATFS", state->finh->unique); + + FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS, + statfs, &state->loc, state->xdata); +} + static void -fuse_statfs (fuse_req_t req, fuse_ino_t ino) +fuse_statfs (xlator_t *this, fuse_in_header_t *finh, void *msg) { fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, 1, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + + fuse_resolve_and_resume (state, fuse_statfs_resume); +} + + +void +fuse_setxattr_resume (fuse_state_t *state) +{ + if (!state->loc.inode) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": STATFS (fuse_loc_fill() fail)", - req_callid (req)); - - fuse_reply_err (req, ENOENT); - free_state (state); + "%"PRIu64": SETXATTR %s/%"PRIu64" (%s) " + "resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid), + state->finh->nodeid, state->name); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": STATFS", req_callid (req)); +#ifdef GF_TEST_FFOP + state->fd = fd_lookup (state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ - FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS, - statfs, &state->loc); + if (state->fd) { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": SETXATTR %p/%"PRIu64" (%s)", state->finh->unique, + state->fd, state->finh->nodeid, state->name); + + FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_FSETXATTR, + fsetxattr, state->fd, state->xattr, state->flags, + state->xdata); + } else { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": SETXATTR %s/%"PRIu64" (%s)", state->finh->unique, + state->loc.path, state->finh->nodeid, state->name); + + FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_SETXATTR, + setxattr, &state->loc, state->xattr, state->flags, + state->xdata); + } } static void -fuse_setxattr (fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) +fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_setxattr_in *fsi = msg; + char *name = (char *)(fsi + 1); + char *value = name + strlen (name) + 1; + struct fuse_private *priv = NULL; + fuse_state_t *state = NULL; - char *dict_value = NULL; - int32_t ret = -1; + char *dict_value = NULL; + int32_t ret = -1; + char *newkey = NULL; -#ifdef DISABLE_POSIX_ACL - if (!strncmp (name, "system.", 7)) { - fuse_reply_err (req, EOPNOTSUPP); - return; - } -#endif + priv = this->private; - state = state_from_req (req); - state->size = size; - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { +#ifdef GF_DARWIN_HOST_OS + if (fsi->position) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": SETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)", - req_callid (req), - state->loc.path, (int64_t)ino, name); + "%"PRIu64": SETXATTR %s/%"PRIu64" (%s):" + "refusing positioned setxattr", + finh->unique, state->loc.path, finh->nodeid, name); + send_fuse_err (this, finh, EINVAL); + FREE (finh); + return; + } +#endif - fuse_reply_err (req, ENOENT); - free_state (state); + if (fuse_ignore_xattr_set (priv, name)) { + (void) send_fuse_err (this, finh, 0); return; } - state->dict = get_new_dict (); + if (!priv->acl) { + if ((strcmp (name, POSIX_ACL_ACCESS_XATTR) == 0) || + (strcmp (name, POSIX_ACL_DEFAULT_XATTR) == 0)) { + send_fuse_err (this, finh, EOPNOTSUPP); + GF_FREE (finh); + return; + } + } - dict_value = memdup (value, size); - dict_set (state->dict, (char *)name, - data_from_dynptr ((void *)dict_value, size)); - dict_ref (state->dict); + if (!priv->selinux) { + if (strncmp (name, "security.", 9) == 0) { + send_fuse_err (this, finh, EOPNOTSUPP); + GF_FREE (finh); + return; + } + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": SETXATTR %s/%"PRId64" (%s)", req_callid (req), - state->loc.path, (int64_t)ino, name); + /* Check if the command is for changing the log + level of process or specific xlator */ + ret = is_gf_log_command (this, name, value); + if (ret >= 0) { + send_fuse_err (this, finh, ret); + GF_FREE (finh); + return; + } + + if (!strcmp ("inode-invalidate", name)) { + gf_log ("fuse", GF_LOG_TRACE, + "got request to invalidate %"PRIu64, finh->nodeid); + send_fuse_err (this, finh, 0); + fuse_invalidate_entry (this, finh->nodeid); + GF_FREE (finh); + return; + } + + if (!strcmp (GFID_XATTR_KEY, name)) { + send_fuse_err (this, finh, EPERM); + GF_FREE (finh); + return; + } + + GET_STATE (this, finh, state); + state->size = fsi->size; + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + + state->xattr = get_new_dict (); + if (!state->xattr) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRIu64": SETXATTR dict allocation failed", + finh->unique); + + send_fuse_err (this, finh, ENOMEM); + free_fuse_state (state); + return; + } - FUSE_FOP (state, fuse_err_cbk, GF_FOP_SETXATTR, - setxattr, &state->loc, state->dict, flags); + ret = fuse_flip_xattr_ns (priv, name, &newkey); + if (ret) { + send_fuse_err (this, finh, ENOMEM); + free_fuse_state (state); + return; + } + + if (fsi->size > 0) { + dict_value = memdup (value, fsi->size); + } else { + gf_log (THIS->name, GF_LOG_ERROR, "value size zero"); + dict_value = NULL; + } + dict_set (state->xattr, newkey, + data_from_dynptr ((void *)dict_value, fsi->size)); + dict_ref (state->xattr); + + state->flags = fsi->flags; + state->name = newkey; + + fuse_resolve_and_resume (state, fuse_setxattr_resume); return; } +static void +send_fuse_xattr (xlator_t *this, fuse_in_header_t *finh, const char *value, + size_t size, size_t expected) +{ + struct fuse_getxattr_out fgxo; + + /* linux kernel limits the size of xattr value to 64k */ + if (size > GLUSTERFS_XATTR_LEN_MAX) + send_fuse_err (this, finh, E2BIG); + else if (expected) { + /* if callback for getxattr and asks for value */ + if (size > expected) + /* reply would be bigger than + * what was asked by kernel */ + send_fuse_err (this, finh, ERANGE); + else + send_fuse_data (this, finh, (void *)value, size); + } else { + fgxo.size = size; + send_fuse_obj (this, finh, &fgxo); + } +} + +/* filter out xattrs that need not be visible on the + * mount point. this is _specifically_ for geo-rep + * as of now, to prevent Rsync from crying out loud + * when it tries to setxattr() for selinux xattrs + */ +static int +fuse_filter_xattr(char *key) +{ + int need_filter = 0; + struct fuse_private *priv = THIS->private; + + if ((priv->client_pid == GF_CLIENT_PID_GSYNCD) + && fnmatch ("*.selinux*", key, FNM_PERIOD) == 0) + need_filter = 1; + + return need_filter; +} + + static int fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - int need_to_free_dict = 0; + int need_to_free_dict = 0; char *value = ""; fuse_state_t *state = NULL; - fuse_req_t req = NULL; - int32_t dummy_ret = 0; + fuse_in_header_t *finh = NULL; data_t *value_data = NULL; - fuse_private_t *priv = NULL; - struct stat st; - char *file = NULL; - int32_t fd = -1; int ret = -1; int32_t len = 0; - data_pair_t *trav = NULL; + int32_t len_next = 0; - priv = this->private; - ret = op_ret; state = frame->root->state; - req = state->req; - dummy_ret = 0; + finh = state->finh; -#ifdef GF_DARWIN_HOST_OS - /* This is needed in MacFuse, where MacOSX Finder needs some specific - * keys to be supported from FS - */ - - if (state->name) { - if (!dict) { - dict = get_new_dict (); - need_to_free_dict = 1; - } - dummy_ret = gf_compat_getxattr (state->name, dict); - if (dummy_ret != -1) - ret = dummy_ret; - } else { - if (!dict) { - dict = get_new_dict (); - need_to_free_dict = 1; - } - dummy_ret = gf_compat_listxattr (ret, dict, state->size); - if (dummy_ret != -1) - ret = dummy_ret; - } -#endif /* DARWIN */ - - if (ret >= 0) { + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": %s() %s => %d", frame->root->unique, + "%"PRIu64": %s() %s => %d", frame->root->unique, gf_fop_list[frame->root->op], state->loc.path, op_ret); /* if successful */ @@ -1978,221 +3164,350 @@ fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* if callback for getxattr */ value_data = dict_get (dict, state->name); if (value_data) { + ret = value_data->len; /* Don't return the value for '\0' */ value = value_data->data; - - /* linux kernel limits the size of xattr value to 64k */ - if (ret > GLUSTERFS_XATTR_LEN_MAX) { - fuse_reply_err (req, E2BIG); - } else if (state->size) { - /* if callback for getxattr and asks for value */ - fuse_reply_buf (req, value, ret); - } else { - /* if callback for getxattr and asks for value length only */ - fuse_reply_xattr (req, ret); - } /* if(ret >...)...else if...else */ - } else if (!strcmp (state->name, "user.glusterfs-booster-volfile")) { - if (!priv->volfile) { - memset (&st, 0, sizeof (st)); - fd = fileno (this->ctx->specfp); - ret = fstat (fd, &st); - if (ret != 0) { - gf_log (this->name, - GF_LOG_ERROR, - "fstat on fd (%d) failed (%s)", fd, strerror (errno)); - fuse_reply_err (req, ENODATA); - } - - priv->volfile_size = st.st_size; - file = priv->volfile = CALLOC (1, priv->volfile_size); - ret = lseek (fd, 0, SEEK_SET); - while ((ret = read (fd, file, GF_UNIT_KB)) > 0) { - file += ret; - } - } - - if (priv->volfile_size > GLUSTERFS_XATTR_LEN_MAX) { - fuse_reply_err (req, E2BIG); - } else if (state->size) { - /* if callback for getxattr and asks for value */ - fuse_reply_buf (req, priv->volfile, priv->volfile_size); - } else { - /* if callback for getxattr and asks for value length only */ - fuse_reply_xattr (req, priv->volfile_size); - } /* if(ret >...)...else if...else */ - } else if (!strcmp (state->name, "user.glusterfs-booster-path")) { - if (state->size) { - fuse_reply_buf (req, state->loc.path, strlen (state->loc.path) + 1); - } else { - fuse_reply_xattr (req, strlen (state->loc.path) + 1); - } - } else { - fuse_reply_err (req, ENODATA); - } /* if(value_data)...else */ - } else { - /* if callback for listxattr */ - trav = dict->members_list; - while (trav) { - len += strlen (trav->key) + 1; - trav = trav->next; - } /* while(trav) */ - value = alloca (len + 1); - ERR_ABORT (value); - len = 0; - trav = dict->members_list; - while (trav) { - strcpy (value + len, trav->key); - value[len + strlen(trav->key)] = '\0'; - len += strlen (trav->key) + 1; - trav = trav->next; - } /* while(trav) */ - if (state->size) { - /* if callback for listxattr and asks for list of keys */ - fuse_reply_buf (req, value, len); + + send_fuse_xattr (this, finh, value, ret, state->size); + /* if(ret >...)...else if...else */ } else { - /* if callback for listxattr and asks for length of keys only */ - fuse_reply_xattr (req, len); - } /* if(state->size)...else */ - } /* if(state->name)...else */ + send_fuse_err (this, finh, ENODATA); + } /* if(value_data)...else */ + } else { + /* if callback for listxattr */ + /* we need to invoke fuse_filter_xattr() twice. Once + * while counting size and then while filling buffer + */ + len = dict_keys_join (NULL, 0, dict, fuse_filter_xattr); + if (len < 0) + goto out; + + value = alloca (len + 1); + if (!value) + goto out; + + len_next = dict_keys_join (value, len, dict, + fuse_filter_xattr); + if (len_next != len) + gf_log (THIS->name, GF_LOG_ERROR, + "sizes not equal %d != %d", + len, len_next); + + send_fuse_xattr (this, finh, value, len, state->size); + } /* if(state->name)...else */ } else { /* if failure - no need to check if listxattr or getxattr */ if (op_errno != ENODATA) { - if (op_errno == ENOTSUP) - { - gf_fuse_xattr_enotsup_log++; - if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER)) - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "extended attribute not " - "supported by the backend " - "storage"); - } - else - { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log, + "glusterfs-fuse", + GF_LOG_ERROR, + "extended attribute not " + "supported by the backend " + "storage"); + } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s() %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path, strerror(op_errno)); + "%"PRIu64": %s(%s) %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], state->name, + state->loc.path, strerror (op_errno)); } } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": %s() %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, - strerror(op_errno)); + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRIu64": %s(%s) %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], state->name, + state->loc.path, strerror (op_errno)); } /* if(op_errno!= ENODATA)...else */ - fuse_reply_err (req, op_errno); + send_fuse_err (this, finh, op_errno); } /* if(op_ret>=0)...else */ - if (need_to_free_dict) - dict_unref (dict); +out: + if (need_to_free_dict) + dict_unref (dict); - free_state (state); + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +void +fuse_getxattr_resume (fuse_state_t *state) +{ + char *value = NULL; + + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": GETXATTR %s/%"PRIu64" (%s) " + "resolution failed", + state->finh->unique, + uuid_utoa (state->resolve.gfid), + state->finh->nodeid, state->name); + + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); + return; + } + +#ifdef GF_TEST_FFOP + state->fd = fd_lookup (state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if (state->name && + (strcmp (state->name, VIRTUAL_GFID_XATTR_KEY) == 0)) { + /* send glusterfs gfid in binary form */ + + value = GF_CALLOC (16 + 1, sizeof(char), + gf_common_mt_char); + if (!value) { + send_fuse_err (state->this, state->finh, ENOMEM); + goto internal_out; + } + memcpy (value, state->loc.inode->gfid, 16); + + send_fuse_xattr (THIS, state->finh, value, 16, state->size); + GF_FREE (value); + internal_out: + free_fuse_state (state); + return; + } + + if (state->name && + (strcmp (state->name, VIRTUAL_GFID_XATTR_KEY_STR) == 0)) { + /* transform binary gfid to canonical form */ + + value = GF_CALLOC (UUID_CANONICAL_FORM_LEN + 1, sizeof(char), + gf_common_mt_char); + if (!value) { + send_fuse_err (state->this, state->finh, ENOMEM); + goto internal_out1; + } + uuid_utoa_r (state->loc.inode->gfid, value); + + send_fuse_xattr (THIS, state->finh, value, + UUID_CANONICAL_FORM_LEN, state->size); + GF_FREE (value); + internal_out1: + free_fuse_state (state); + return; + } + + + if (state->fd) { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": GETXATTR %p/%"PRIu64" (%s)", state->finh->unique, + state->fd, state->finh->nodeid, state->name); + + FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_FGETXATTR, + fgetxattr, state->fd, state->name, state->xdata); + } else { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": GETXATTR %s/%"PRIu64" (%s)", state->finh->unique, + state->loc.path, state->finh->nodeid, state->name); + + FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, + getxattr, &state->loc, state->name, state->xdata); + } +} + + static void -fuse_getxattr (fuse_req_t req, fuse_ino_t ino, const char *name, size_t size) +fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) { - fuse_state_t *state = NULL; - int32_t ret = -1; + struct fuse_getxattr_in *fgxi = msg; + char *name = (char *)(fgxi + 1); + fuse_state_t *state = NULL; + struct fuse_private *priv = NULL; + int rv = 0; + int op_errno = EINVAL; + char *newkey = NULL; -#ifdef DISABLE_POSIX_ACL - if (!strncmp (name, "system.", 7)) { - fuse_reply_err (req, ENODATA); - return; - } + priv = this->private; + GET_STATE (this, finh, state); + +#ifdef GF_DARWIN_HOST_OS + if (fgxi->position) { + /* position can be used only for + * resource fork queries which we + * don't support anyway... so handling + * it separately is just sort of a + * matter of aesthetics, not strictly + * necessary. + */ + + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": GETXATTR %s/%"PRIu64" (%s):" + "refusing positioned getxattr", + finh->unique, state->loc.path, finh->nodeid, name); + op_errno = EINVAL; + goto err; + } #endif - state = state_from_req (req); - state->size = size; - state->name = strdup (name); + if (!priv->acl) { + if ((strcmp (name, POSIX_ACL_ACCESS_XATTR) == 0) || + (strcmp (name, POSIX_ACL_DEFAULT_XATTR) == 0)) { + op_errno = ENOTSUP; + goto err; + } + } + + if (!priv->selinux) { + if (strncmp (name, "security.", 9) == 0) { + op_errno = ENODATA; + goto err; + } + } + + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); + + rv = fuse_flip_xattr_ns (priv, name, &newkey); + if (rv) { + op_errno = ENOMEM; + goto err; + } - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { + state->size = fgxi->size; + state->name = newkey; + + fuse_resolve_and_resume (state, fuse_getxattr_resume); + + return; + err: + send_fuse_err (this, finh, op_errno); + free_fuse_state (state); + return; +} + + +void +fuse_listxattr_resume (fuse_state_t *state) +{ + if (!state->loc.inode) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": GETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)", - req_callid (req), state->loc.path, (int64_t)ino, name); + "%"PRIu64": LISTXATTR %s/%"PRIu64 + "resolution failed", state->finh->unique, + uuid_utoa (state->resolve.gfid), state->finh->nodeid); - fuse_reply_err (req, ENOENT); - free_state (state); + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": GETXATTR %s/%"PRId64" (%s)", req_callid (req), - state->loc.path, (int64_t)ino, name); +#ifdef GF_TEST_FFOP + state->fd = fd_lookup (state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if (state->fd) { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": LISTXATTR %p/%"PRIu64, state->finh->unique, + state->fd, state->finh->nodeid); - FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, - getxattr, &state->loc, name); + FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_FGETXATTR, + fgetxattr, state->fd, NULL, state->xdata); + } else { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": LISTXATTR %s/%"PRIu64, state->finh->unique, + state->loc.path, state->finh->nodeid); - return; + FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, + getxattr, &state->loc, NULL, state->xdata); + } } static void -fuse_listxattr (fuse_req_t req, fuse_ino_t ino, size_t size) +fuse_listxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_getxattr_in *fgxi = msg; fuse_state_t *state = NULL; - int32_t ret = -1; - state = state_from_req (req); - state->size = size; - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": LISTXATTR %s/%"PRId64" (fuse_loc_fill() failed)", - req_callid (req), state->loc.path, (int64_t)ino); + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); - fuse_reply_err (req, ENOENT); - free_state (state); + state->size = fgxi->size; + + fuse_resolve_and_resume (state, fuse_listxattr_resume); + + return; +} + + +void +fuse_removexattr_resume (fuse_state_t *state) +{ + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s) " + "resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid), + state->finh->nodeid, state->name); + + send_fuse_err (state->this, state->finh, ENOENT); + free_fuse_state (state); return; } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": LISTXATTR %s/%"PRId64, req_callid (req), - state->loc.path, (int64_t)ino); +#ifdef GF_TEST_FFOP + state->fd = fd_lookup (state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ - FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, - getxattr, &state->loc, NULL); + if (state->fd) { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": REMOVEXATTR %p/%"PRIu64" (%s)", state->finh->unique, + state->fd, state->finh->nodeid, state->name); - return; + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FREMOVEXATTR, + fremovexattr, state->fd, state->name, state->xdata); + } else { + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s)", state->finh->unique, + state->loc.path, state->finh->nodeid, state->name); + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR, + removexattr, &state->loc, state->name, state->xdata); + } } static void -fuse_removexattr (fuse_req_t req, fuse_ino_t ino, const char *name) - +fuse_removexattr (xlator_t *this, fuse_in_header_t *finh, void *msg) { - fuse_state_t *state = NULL; - int32_t ret = -1; + char *name = msg; - state = state_from_req (req); - ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)", - req_callid (req), state->loc.path, (int64_t)ino, name); + fuse_state_t *state = NULL; + fuse_private_t *priv = NULL; + int32_t ret = -1; + char *newkey = NULL; - fuse_reply_err (req, ENOENT); - free_state (state); + if (!strcmp (GFID_XATTR_KEY, name)) { + send_fuse_err (this, finh, EPERM); + GF_FREE (finh); return; } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s)", req_callid (req), - state->loc.path, (int64_t)ino, name); + priv = this->private; + + GET_STATE (this, finh, state); + + fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); - FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR, - removexattr, &state->loc, name); + ret = fuse_flip_xattr_ns (priv, name, &newkey); + if (ret) { + send_fuse_err (this, finh, ENOMEM); + free_fuse_state (state); + return; + } + state->name = newkey; + + fuse_resolve_and_resume (state, fuse_removexattr_resume); return; } @@ -2201,57 +3516,85 @@ static int gf_fuse_lk_enosys_log; static int fuse_getlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct flock *lock) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { fuse_state_t *state = NULL; state = frame->root->state; + struct fuse_lk_out flo = {{0, }, }; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": ERR => 0", frame->root->unique); - fuse_reply_lock (state->req, lock); + "%"PRIu64": ERR => 0", frame->root->unique); + flo.lk.type = lock->l_type; + flo.lk.pid = lock->l_pid; + if (lock->l_type == F_UNLCK) + flo.lk.start = flo.lk.end = 0; + else { + flo.lk.start = lock->l_start; + flo.lk.end = lock->l_len ? + (lock->l_start + lock->l_len - 1) : + OFFSET_MAX; + } + send_fuse_obj (this, state->finh, &flo); } else { if (op_errno == ENOSYS) { gf_fuse_lk_enosys_log++; if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "GETLK not supported. loading " + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "GETLK not supported. loading " "'features/posix-locks' on server side " "will add GETLK support."); } } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": ERR => -1 (%s)", - frame->root->unique, strerror (op_errno)); + "%"PRIu64": ERR => -1 (%s)", + frame->root->unique, strerror (op_errno)); } - fuse_reply_err (state->req, op_errno); + send_fuse_err (this, state->finh, op_errno); } - - free_state (state); + + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +void +fuse_getlk_resume (fuse_state_t *state) +{ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": GETLK %p", state->finh->unique, state->fd); + + FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK, + lk, state->fd, F_GETLK, &state->lk_lock, state->xdata); +} + + static void -fuse_getlk (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - struct flock *lock) +fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_lk_in *fli = msg; + fuse_state_t *state = NULL; - fd_t *fd = NULL; + fd_t *fd = NULL; - fd = FI_TO_FD (fi); - state = state_from_req (req); - state->req = req; - state->fd = fd; + fd = FH_TO_FD (fli->fh); + GET_STATE (this, finh, state); + state->fd = fd; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": GETLK %p", req_callid (req), fd); + fuse_resolve_fd_init (state, &state->resolve, fd); - FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK, - lk, fd, F_GETLK, lock); + convert_fuse_file_lock (&fli->lk, &state->lk_lock, + fli->owner); + + state->lk_owner = fli->owner; + + fuse_resolve_and_resume (state, fuse_getlk_resume); return; } @@ -2259,566 +3602,1804 @@ fuse_getlk (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, static int fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct flock *lock) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { + uint32_t op = 0; fuse_state_t *state = NULL; state = frame->root->state; + op = state->finh->opcode; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); if (op_ret == 0) { gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": ERR => 0", frame->root->unique); - fuse_reply_err (state->req, 0); + "%"PRIu64": ERR => 0", frame->root->unique); + fd_lk_insert_and_merge (state->fd, + (op == FUSE_SETLK) ? F_SETLK : F_SETLKW, + &state->lk_lock); + + send_fuse_err (this, state->finh, 0); } else { if (op_errno == ENOSYS) { gf_fuse_lk_enosys_log++; if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, + gf_log ("glusterfs-fuse", GF_LOG_ERROR, "SETLK not supported. loading " "'features/posix-locks' on server side " "will add SETLK support."); } + } else if (op_errno == EAGAIN) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "Returning EAGAIN Flock: " + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", + (unsigned long long) state->lk_lock.l_start, + (unsigned long long) state->lk_lock.l_len, + (unsigned long long) state->lk_lock.l_pid, + lkowner_utoa (&frame->root->lk_owner)); } else { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRId64": ERR => -1 (%s)", - frame->root->unique, strerror (op_errno)); + "%"PRIu64": ERR => -1 (%s)", + frame->root->unique, strerror (op_errno)); } - fuse_reply_err (state->req, op_errno); + send_fuse_err (this, state->finh, op_errno); } - - free_state (state); + + free_fuse_state (state); STACK_DESTROY (frame->root); return 0; } +void +fuse_setlk_resume (fuse_state_t *state) +{ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": SETLK%s %p", state->finh->unique, + state->finh->opcode == FUSE_SETLK ? "" : "W", state->fd); + + FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK, lk, state->fd, + state->finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW, + &state->lk_lock, state->xdata); +} + + static void -fuse_setlk (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - struct flock *lock, int sleep) +fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg) { + struct fuse_lk_in *fli = msg; + fuse_state_t *state = NULL; fd_t *fd = NULL; - - fd = FI_TO_FD (fi); - state = state_from_req (req); - state->req = req; + + fd = FH_TO_FD (fli->fh); + GET_STATE (this, finh, state); + state->finh = finh; state->fd = fd; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRId64": SETLK %p (sleep=%d)", req_callid (req), fd, - sleep); + fuse_resolve_fd_init (state, &state->resolve, fd); + + convert_fuse_file_lock (&fli->lk, &state->lk_lock, + fli->owner); - FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK, - lk, fd, (sleep ? F_SETLKW : F_SETLK), lock); + state->lk_owner = fli->owner; + + fuse_resolve_and_resume (state, fuse_setlk_resume); return; } -static void -fuse_init (void *data, struct fuse_conn_info *conn) +static void * +notify_kernel_loop (void *data) { - return; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + struct fuse_out_header *fouh = NULL; + int rv = 0; + + char inval_buf[INVAL_BUF_SIZE] = {0,}; + + this = data; + priv = this->private; + + for (;;) { + rv = read (priv->revchan_in, inval_buf, sizeof (*fouh)); + if (rv != sizeof (*fouh)) + break; + fouh = (struct fuse_out_header *)inval_buf; + rv = read (priv->revchan_in, inval_buf + sizeof (*fouh), + fouh->len - sizeof (*fouh)); + if (rv != fouh->len - sizeof (*fouh)) + break; + rv = write (priv->fd, inval_buf, fouh->len); + if (rv != fouh->len && !(rv == -1 && errno == ENOENT)) + break; + } + + close (priv->revchan_in); + close (priv->revchan_out); + + gf_log ("glusterfs-fuse", GF_LOG_INFO, + "kernel notifier loop terminated"); + + return NULL; +} + + +static void +fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_init_in *fini = msg; + struct fuse_init_out fino = {0,}; + fuse_private_t *priv = NULL; + int ret = 0; + int pfd[2] = {0,}; + pthread_t messenger; + + priv = this->private; + + if (priv->init_recvd) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "got INIT after first message"); + + close (priv->fd); + goto out; + } + + priv->init_recvd = 1; + + if (fini->major != FUSE_KERNEL_VERSION) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "unsupported FUSE protocol version %d.%d", + fini->major, fini->minor); + + close (priv->fd); + goto out; + } + priv->proto_minor = fini->minor; + + fino.major = FUSE_KERNEL_VERSION; + fino.minor = FUSE_KERNEL_MINOR_VERSION; + fino.max_readahead = 1 << 17; + fino.max_write = 1 << 17; + fino.flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; +#if FUSE_KERNEL_MINOR_VERSION >= 17 + if (fini->minor >= 17) + fino.flags |= FUSE_FLOCK_LOCKS; +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 12 + if (fini->minor >= 12) { + /* let fuse leave the umask processing to us, so that it does not + * break extended POSIX ACL defaults on server */ + fino.flags |= FUSE_DONT_MASK; + } +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 9 + if (fini->minor >= 6 /* fuse_init_in has flags */ && + fini->flags & FUSE_BIG_WRITES) { + /* no need for direct I/O mode by default if big writes are supported */ + if (priv->direct_io_mode == 2) + priv->direct_io_mode = 0; + fino.flags |= FUSE_BIG_WRITES; + } + + /* Used for 'reverse invalidation of inode' */ + if (fini->minor >= 12) { + if (pipe(pfd) == -1) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "cannot create pipe pair (%s)", + strerror(errno)); + + close (priv->fd); + goto out; + } + priv->revchan_in = pfd[0]; + priv->revchan_out = pfd[1]; + ret = gf_thread_create (&messenger, NULL, notify_kernel_loop, + this); + if (ret != 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "failed to start messenger daemon (%s)", + strerror(errno)); + + close (priv->fd); + goto out; + } + priv->reverse_fuse_thread_started = _gf_true; + } else { + /* + * FUSE minor < 12 does not implement invalidate notifications. + * This mechanism is required for fopen-keep-cache to operate + * correctly. Disable and warn the user. + */ + if (priv->fopen_keep_cache) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, "FUSE version " + "%d.%d does not support inval notifications. " + "fopen-keep-cache disabled.", fini->major, + fini->minor); + priv->fopen_keep_cache = 0; + } + } + + if (fini->minor >= 13) { + fino.max_background = priv->background_qlen; + fino.congestion_threshold = priv->congestion_threshold; + } + if (fini->minor < 9) + *priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE; +#endif + if (priv->use_readdirp) { + if (fini->flags & FUSE_DO_READDIRPLUS) + fino.flags |= FUSE_DO_READDIRPLUS; + } + + if (priv->fopen_keep_cache == 2) { + /* If user did not explicitly set --fopen-keep-cache[=off], + then check if kernel support FUSE_AUTO_INVAL_DATA and ... + */ + if (fini->flags & FUSE_AUTO_INVAL_DATA) { + /* ... enable fopen_keep_cache mode if supported. + */ + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "Detected " + "support for FUSE_AUTO_INVAL_DATA. Enabling " + "fopen_keep_cache automatically."); + fino.flags |= FUSE_AUTO_INVAL_DATA; + priv->fopen_keep_cache = 1; + } else { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "No support " + "for FUSE_AUTO_INVAL_DATA. Disabling " + "fopen_keep_cache."); + /* ... else disable. */ + priv->fopen_keep_cache = 0; + } + } else if (priv->fopen_keep_cache == 1) { + /* If user explicitly set --fopen-keep-cache[=on], + then enable FUSE_AUTO_INVAL_DATA if possible. + */ + if (fini->flags & FUSE_AUTO_INVAL_DATA) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "fopen_keep_cache " + "is explicitly set. Enabling FUSE_AUTO_INVAL_DATA"); + fino.flags |= FUSE_AUTO_INVAL_DATA; + } else { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, "fopen_keep_cache " + "is explicitly set. Support for " + "FUSE_AUTO_INVAL_DATA is missing"); + } + } + + if (fini->flags & FUSE_ASYNC_DIO) + fino.flags |= FUSE_ASYNC_DIO; + + ret = send_fuse_obj (this, finh, &fino); + if (ret == 0) + gf_log ("glusterfs-fuse", GF_LOG_INFO, + "FUSE inited with protocol versions:" + " glusterfs %d.%d kernel %d.%d", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION, + fini->major, fini->minor); + else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "FUSE init failed (%s)", strerror (ret)); + + close (priv->fd); + } + + out: + GF_FREE (finh); } + +static void +fuse_enosys (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + send_fuse_err (this, finh, ENOSYS); + + GF_FREE (finh); +} + + static void -fuse_destroy (void *data) -{ - -} - -static struct fuse_lowlevel_ops fuse_ops = { - .init = fuse_init, - .destroy = fuse_destroy, - .lookup = fuse_lookup, - .forget = fuse_forget, - .getattr = fuse_getattr, - .setattr = fuse_setattr, - .opendir = fuse_opendir, - .readdir = fuse_readdir, - .releasedir = fuse_releasedir, - .access = fuse_access, - .readlink = fuse_readlink, - .mknod = fuse_mknod, - .mkdir = fuse_mkdir, - .unlink = fuse_unlink, - .rmdir = fuse_rmdir, - .symlink = fuse_symlink, - .rename = fuse_rename, - .link = fuse_link, - .create = fuse_create, - .open = fuse_open, - .read = fuse_readv, - .write = fuse_write, - .flush = fuse_flush, - .release = fuse_release, - .fsync = fuse_fsync, - .fsyncdir = fuse_fsyncdir, - .statfs = fuse_statfs, - .setxattr = fuse_setxattr, - .getxattr = fuse_getxattr, - .listxattr = fuse_listxattr, - .removexattr = fuse_removexattr, - .getlk = fuse_getlk, - .setlk = fuse_setlk +fuse_destroy (xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + send_fuse_err (this, finh, 0); + + GF_FREE (finh); +} + + + +struct fuse_first_lookup { + pthread_mutex_t mutex; + pthread_cond_t cond; + char fin; }; +int +fuse_first_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xattr, + struct iatt *postparent) +{ + struct fuse_first_lookup *stub = NULL; + + stub = frame->local; + + if (op_ret == 0) { + gf_log (this->name, GF_LOG_TRACE, + "first lookup on root succeeded."); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "first lookup on root failed."); + } + + pthread_mutex_lock (&stub->mutex); + { + stub->fin = 1; + pthread_cond_broadcast (&stub->cond); + } + pthread_mutex_unlock (&stub->mutex); + + return 0; +} + int -fuse_root_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct stat *buf, dict_t *xattr) +fuse_first_lookup (xlator_t *this) { - fuse_private_t *priv = NULL; + fuse_private_t *priv = NULL; + loc_t loc = {0, }; + call_frame_t *frame = NULL; + xlator_t *xl = NULL; + dict_t *dict = NULL; + struct fuse_first_lookup stub; + uuid_t gfid; + int ret; priv = this->private; - if (op_ret == 0) { - gf_log (this->name, GF_LOG_TRACE, - "first lookup on root succeeded."); - inode_lookup (inode); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "first lookup on root failed."); - } - STACK_DESTROY (frame->root); - pthread_mutex_lock (&priv->first_call_mutex); - { - priv->first_call = 0; - pthread_cond_broadcast (&priv->first_call_cond); - } - pthread_mutex_unlock (&priv->first_call_mutex); - return 0; + loc.path = "/"; + loc.name = ""; + loc.inode = fuse_ino_to_inode (1, this); + uuid_copy (loc.gfid, loc.inode->gfid); + loc.parent = NULL; + + dict = dict_new (); + frame = create_frame (this, this->ctx->pool); + frame->root->type = GF_OP_TYPE_FOP; + + xl = priv->active_subvol; + + pthread_mutex_init (&stub.mutex, NULL); + pthread_cond_init (&stub.cond, NULL); + stub.fin = 0; + + frame->local = &stub; + + memset (gfid, 0, 16); + gfid[15] = 1; + ret = dict_set_static_bin (dict, "gfid-req", gfid, 16); + if (ret) + gf_log (xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'"); + + STACK_WIND (frame, fuse_first_lookup_cbk, xl, xl->fops->lookup, + &loc, dict); + dict_unref (dict); + + pthread_mutex_lock (&stub.mutex); + { + while (!stub.fin) { + pthread_cond_wait (&stub.cond, &stub.mutex); + } + } + pthread_mutex_unlock (&stub.mutex); + + pthread_mutex_destroy (&stub.mutex); + pthread_cond_destroy (&stub.cond); + + frame->local = NULL; + STACK_DESTROY (frame->root); + + return 0; } int -fuse_root_lookup (xlator_t *this) +fuse_nameless_lookup (xlator_t *xl, uuid_t gfid, loc_t *loc) { - fuse_private_t *priv = NULL; - loc_t loc; - call_frame_t *frame = NULL; - xlator_t *xl = NULL; - dict_t *dict = NULL; + int ret = -1; + dict_t *xattr_req = NULL; + struct iatt iatt = {0, }; + inode_t *linked_inode = NULL; - priv = this->private; + if ((loc == NULL) || (xl == NULL)) { + goto out; + } - pthread_cond_init (&priv->first_call_cond, NULL); - pthread_mutex_init (&priv->first_call_mutex, NULL); - - loc.path = "/"; - loc.name = ""; - loc.ino = 1; - loc.inode = inode_search (this->itable, 1, NULL); - loc.parent = NULL; - - dict = dict_new(); - frame = create_frame (this, this->ctx->pool); - frame->root->type = GF_OP_TYPE_FOP_REQUEST; - xl = this->children->xlator; - - STACK_WIND (frame, fuse_root_lookup_cbk, xl, xl->fops->lookup, - &loc, dict); - dict_unref (dict); - - pthread_mutex_lock (&priv->first_call_mutex); - { - while (priv->first_call) { - pthread_cond_wait (&priv->first_call_cond, - &priv->first_call_mutex); - } - } - pthread_mutex_unlock (&priv->first_call_mutex); + if (loc->inode == NULL) { + loc->inode = inode_new (xl->itable); + if (loc->inode == NULL) { + goto out; + } + } - return 0; + uuid_copy (loc->gfid, gfid); + + xattr_req = dict_new (); + if (xattr_req == NULL) { + goto out; + } + + ret = syncop_lookup (xl, loc, xattr_req, &iatt, NULL, NULL); + if (ret < 0) { + goto out; + } + + linked_inode = inode_link (loc->inode, NULL, NULL, &iatt); + inode_unref (loc->inode); + loc->inode = linked_inode; + + ret = 0; +out: + if (xattr_req != NULL) { + dict_unref (xattr_req); + } + + return ret; +} + + +int +fuse_migrate_fd_open (xlator_t *this, fd_t *basefd, fd_t *oldfd, + xlator_t *old_subvol, xlator_t *new_subvol) +{ + loc_t loc = {0, }; + fd_t *newfd = NULL, *old_activefd = NULL; + fuse_fd_ctx_t *basefd_ctx = NULL; + fuse_fd_ctx_t *newfd_ctx = NULL; + int ret = 0, flags = 0; + + ret = inode_path (basefd->inode, NULL, (char **)&loc.path); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "cannot construct path of gfid (%s) failed" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + uuid_utoa (basefd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + goto out; + } + + uuid_copy (loc.gfid, basefd->inode->gfid); + + loc.inode = inode_find (new_subvol->itable, basefd->inode->gfid); + + if (loc.inode == NULL) { + ret = fuse_nameless_lookup (new_subvol, basefd->inode->gfid, + &loc); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "name-less lookup of gfid (%s) failed (%s)" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + uuid_utoa (basefd->inode->gfid), + strerror (errno), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + goto out; + } + + } + + basefd_ctx = fuse_fd_ctx_get (this, basefd); + GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out); + + newfd = fd_create (loc.inode, basefd->pid); + if (newfd == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "cannot create new fd, hence not migrating basefd " + "(ptr:%p inode-gfid:%s) " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, + uuid_utoa (loc.inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + goto out; + } + + newfd->flags = basefd->flags; + if (newfd->lk_ctx) + fd_lk_ctx_unref (newfd->lk_ctx); + + newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx); + + newfd_ctx = fuse_fd_ctx_check_n_create (this, newfd); + GF_VALIDATE_OR_GOTO ("glusterfs-fuse", newfd_ctx, out); + + if (IA_ISDIR (basefd->inode->ia_type)) { + ret = syncop_opendir (new_subvol, &loc, newfd); + } else { + flags = basefd->flags & ~(O_CREAT | O_EXCL | O_TRUNC); + ret = syncop_open (new_subvol, &loc, flags, newfd); + } + + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "open on basefd (ptr:%p inode-gfid:%s) failed (%s)" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, + uuid_utoa (basefd->inode->gfid), strerror (errno), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + goto out; + } + + fd_bind (newfd); + + LOCK (&basefd->lock); + { + if (basefd_ctx->activefd != NULL) { + old_activefd = basefd_ctx->activefd; + } + + basefd_ctx->activefd = newfd; + } + UNLOCK (&basefd->lock); + + if (old_activefd != NULL) { + fd_unref (old_activefd); + } + + gf_log ("glusterfs-fuse", GF_LOG_INFO, + "migrated basefd (%p) to newfd (%p) (inode-gfid:%s)" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, newfd, + uuid_utoa (basefd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + + ret = 0; + +out: + loc_wipe (&loc); + + return ret; +} + +int +fuse_migrate_locks (xlator_t *this, fd_t *basefd, fd_t *oldfd, + xlator_t *old_subvol, xlator_t *new_subvol) +{ + int ret = -1; + dict_t *lockinfo = NULL; + void *ptr = NULL; + fd_t *newfd = NULL; + fuse_fd_ctx_t *basefd_ctx = NULL; + + + if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx)) + return 0; + + basefd_ctx = fuse_fd_ctx_get (this, basefd); + GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out); + + LOCK (&basefd->lock); + { + newfd = fd_ref (basefd_ctx->activefd); + } + UNLOCK (&basefd->lock); + + ret = syncop_fgetxattr (old_subvol, oldfd, &lockinfo, + GF_XATTR_LOCKINFO_KEY); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "getting lockinfo failed while migrating locks" + "(oldfd:%p newfd:%p inode-gfid:%s)" + "(old-subvol:%s-%d new-subvol:%s-%d)", + oldfd, newfd, uuid_utoa (newfd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + goto out; + } + + ret = dict_get_ptr (lockinfo, GF_XATTR_LOCKINFO_KEY, &ptr); + if (ptr == NULL) { + ret = 0; + gf_log (this->name, GF_LOG_INFO, + "No lockinfo present on any of the bricks " + "(oldfd: %p newfd:%p inode-gfid:%s) " + "(old-subvol:%s-%d new-subvol:%s-%d)", + oldfd, newfd, uuid_utoa (newfd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + + goto out; + } + + ret = syncop_fsetxattr (new_subvol, newfd, lockinfo, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "migrating locks failed (oldfd:%p newfd:%p " + "inode-gfid:%s) (old-subvol:%s-%d new-subvol:%s-%d)", + oldfd, newfd, uuid_utoa (newfd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + goto out; + } + +out: + if (newfd) + fd_unref (newfd); + + if (lockinfo != NULL) { + dict_unref (lockinfo); + } + + return ret; +} + + +int +fuse_migrate_fd (xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + int ret = -1; + char create_in_progress = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + fd_t *oldfd = NULL; + + basefd_ctx = fuse_fd_ctx_get (this, basefd); + GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out); + + LOCK (&basefd->lock); + { + oldfd = basefd_ctx->activefd ? basefd_ctx->activefd + : basefd; + fd_ref (oldfd); + } + UNLOCK (&basefd->lock); + + LOCK (&oldfd->inode->lock); + { + if (uuid_is_null (oldfd->inode->gfid)) { + create_in_progress = 1; + } else { + create_in_progress = 0; + } + } + UNLOCK (&oldfd->inode->lock); + + if (create_in_progress) { + gf_log ("glusterfs-fuse", GF_LOG_INFO, + "create call on fd (%p) is in progress " + "(basefd-ptr:%p basefd-inode.gfid:%s), " + "hence deferring migration till application does an " + "fd based operation on this fd" + "(old-subvolume:%s-%d, new-subvolume:%s-%d)", + oldfd, basefd, uuid_utoa (basefd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + + ret = 0; + goto out; + } + + if (oldfd->inode->table->xl == old_subvol) { + ret = syncop_fsync (old_subvol, oldfd, 0); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "syncop_fsync failed (%s) on fd (%p)" + "(basefd:%p basefd-inode.gfid:%s) " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + strerror (errno), oldfd, basefd, + uuid_utoa (basefd->inode->gfid), + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + } + } else { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "basefd (ptr:%p inode-gfid:%s) was not " + "migrated during previous graph switch" + "(old-subvolume:%s-%d new-subvolume: %s-%d)", basefd, + basefd->inode->gfid, + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + } + + ret = fuse_migrate_fd_open (this, basefd, oldfd, old_subvol, + new_subvol); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "open corresponding to " + "basefd (ptr:%p inode-gfid:%s) in new graph failed " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, + uuid_utoa (basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + goto out; + } + + ret = fuse_migrate_locks (this, basefd, oldfd, old_subvol, + new_subvol); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "migrating locks from old-subvolume (%s-%d) to " + "new-subvolume (%s-%d) failed (inode-gfid:%s oldfd:%p " + "basefd:%p)", old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id, + uuid_utoa (basefd->inode->gfid), oldfd, basefd); + + } +out: + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "migration of basefd " + "(ptr:%p inode-gfid:%s) failed" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, + oldfd ? uuid_utoa (oldfd->inode->gfid) : NULL, + old_subvol->name, old_subvol->graph->id, + new_subvol->name, new_subvol->graph->id); + } + + fd_unref (oldfd); + + return ret; +} + + +int +fuse_handle_opened_fds (xlator_t *this, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + fuse_private_t *priv = NULL; + fdentry_t *fdentries = NULL; + uint32_t count = 0; + fdtable_t *fdtable = NULL; + int i = 0; + fd_t *fd = NULL; + int32_t ret = 0; + fuse_fd_ctx_t *fdctx = NULL; + + priv = this->private; + + fdtable = priv->fdtable; + + fdentries = gf_fd_fdtable_copy_all_fds (fdtable, &count); + if (fdentries != NULL) { + for (i = 0; i < count; i++) { + fd = fdentries[i].fd; + if (fd == NULL) + continue; + + ret = fuse_migrate_fd (this, fd, old_subvol, + new_subvol); + + fdctx = fuse_fd_ctx_get (this, fd); + if (fdctx) { + LOCK (&fd->lock); + { + if (ret < 0) { + fdctx->migration_failed = 1; + } else { + fdctx->migration_failed = 0; + } + } + UNLOCK (&fd->lock); + } + } + + for (i = 0; i < count ; i++) { + fd = fdentries[i].fd; + if (fd) + fd_unref (fd); + } + + GF_FREE (fdentries); + } + + return 0; } +static int +fuse_handle_blocked_locks (xlator_t *this, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + return 0; +} + + +static int +fuse_graph_switch_task (void *data) +{ + fuse_graph_switch_args_t *args = NULL; + + args = data; + if (args == NULL) { + goto out; + } + + /* don't change the order of handling open fds and blocked locks, since + * the act of opening files also reacquires granted locks in new graph. + */ + fuse_handle_opened_fds (args->this, args->old_subvol, args->new_subvol); + + fuse_handle_blocked_locks (args->this, args->old_subvol, + args->new_subvol); + +out: + return 0; +} + + +fuse_graph_switch_args_t * +fuse_graph_switch_args_alloc (void) +{ + fuse_graph_switch_args_t *args = NULL; + + args = GF_CALLOC (1, sizeof (*args), gf_fuse_mt_graph_switch_args_t); + if (args == NULL) { + goto out; + } + +out: + return args; +} + + +void +fuse_graph_switch_args_destroy (fuse_graph_switch_args_t *args) +{ + if (args == NULL) { + goto out; + } + + GF_FREE (args); +out: + return; +} + + +int +fuse_handle_graph_switch (xlator_t *this, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + call_frame_t *frame = NULL; + int32_t ret = -1; + fuse_graph_switch_args_t *args = NULL; + + frame = create_frame (this, this->ctx->pool); + if (frame == NULL) { + goto out; + } + + args = fuse_graph_switch_args_alloc (); + if (args == NULL) { + goto out; + } + + args->this = this; + args->old_subvol = old_subvol; + args->new_subvol = new_subvol; + + ret = synctask_new (this->ctx->env, fuse_graph_switch_task, NULL, frame, + args); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, "starting sync-task to " + "handle graph switch failed"); + goto out; + } + + ret = 0; +out: + if (args != NULL) { + fuse_graph_switch_args_destroy (args); + } + + if (frame != NULL) { + STACK_DESTROY (frame->root); + } + + return ret; +} + + +int +fuse_graph_sync (xlator_t *this) +{ + fuse_private_t *priv = NULL; + int need_first_lookup = 0; + int ret = 0; + xlator_t *old_subvol = NULL, *new_subvol = NULL; + uint64_t winds_on_old_subvol = 0; + + priv = this->private; + + pthread_mutex_lock (&priv->sync_mutex); + { + if (!priv->next_graph) + goto unlock; + + old_subvol = priv->active_subvol; + new_subvol = priv->active_subvol = priv->next_graph->top; + priv->next_graph = NULL; + need_first_lookup = 1; + + while (!priv->event_recvd) { + ret = pthread_cond_wait (&priv->sync_cond, + &priv->sync_mutex); + if (ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "timedwait returned non zero value " + "ret: %d errno: %d", ret, errno); + break; + } + } + } +unlock: + pthread_mutex_unlock (&priv->sync_mutex); + + if (need_first_lookup) { + fuse_first_lookup (this); + } + + if ((old_subvol != NULL) && (new_subvol != NULL)) { + fuse_handle_graph_switch (this, old_subvol, new_subvol); + + pthread_mutex_lock (&priv->sync_mutex); + { + old_subvol->switched = 1; + winds_on_old_subvol = old_subvol->winds; + } + pthread_mutex_unlock (&priv->sync_mutex); + + if (winds_on_old_subvol == 0) { + xlator_notify (old_subvol, GF_EVENT_PARENT_DOWN, + old_subvol, NULL); + } + } + + return 0; +} + +int +fuse_get_mount_status (xlator_t *this) +{ + int kid_status = -1; + fuse_private_t *priv = this->private; + + if (read(priv->status_pipe[0],&kid_status, sizeof(kid_status)) < 0) { + gf_log (this->name, GF_LOG_ERROR, "could not get mount status"); + kid_status = -1; + } + gf_log (this->name, GF_LOG_DEBUG, "mount status is %d", kid_status); + + close(priv->status_pipe[0]); + close(priv->status_pipe[1]); + return kid_status; +} + static void * fuse_thread_proc (void *data) { - char *mount_point = NULL; - xlator_t *this = NULL; - fuse_private_t *priv = NULL; - int32_t res = 0; - struct iobuf *iobuf = NULL; - size_t chan_size = 0; + char *mount_point = NULL; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + ssize_t res = 0; + struct iobuf *iobuf = NULL; + fuse_in_header_t *finh; + struct iovec iov_in[2]; + void *msg = NULL; + const size_t msg0_size = sizeof (*finh) + 128; + fuse_handler_t **fuse_ops = NULL; + struct pollfd pfd[2] = {{0,}}; + gf_boolean_t mount_finished = _gf_false; this = data; priv = this->private; - chan_size = fuse_chan_bufsize (priv->ch); + fuse_ops = priv->fuse_ops; + + THIS = this; + + iov_in[0].iov_len = sizeof (*finh) + sizeof (struct fuse_write_in); + iov_in[1].iov_len = ((struct iobuf_pool *)this->ctx->iobuf_pool) + ->default_page_size; + priv->msg0_len_p = &iov_in[0].iov_len; + + for (;;) { + /* THIS has to be reset here */ + THIS = this; + + if (!mount_finished) { + memset(pfd,0,sizeof(pfd)); + pfd[0].fd = priv->status_pipe[0]; + pfd[0].events = POLLIN | POLLHUP | POLLERR; + pfd[1].fd = priv->fd; + pfd[1].events = POLLIN | POLLHUP | POLLERR; + if (poll(pfd,2,-1) < 0) { + gf_log (this->name, GF_LOG_ERROR, + "poll error %s", strerror(errno)); + break; + } + if (pfd[0].revents & POLLIN) { + if (fuse_get_mount_status(this) != 0) { + break; + } + mount_finished = _gf_true; + } + else if (pfd[0].revents) { + gf_log (this->name, GF_LOG_ERROR, + "mount pipe closed without status"); + break; + } + if (!pfd[1].revents) { + continue; + } + } - while (!fuse_session_exited (priv->se)) { + /* + * We don't want to block on readv while we're still waiting + * for mount status. That means we only want to get here if + * mount_status is true (meaning that our wait completed + * already) or if we already called poll(2) on priv->fd to + * make sure it's ready. + */ + + if (priv->init_recvd) + fuse_graph_sync (this); + + /* TODO: This place should always get maximum supported buffer + size from 'fuse', which is as of today 128KB. If we bring in + support for higher block sizes support, then we should be + changing this one too */ iobuf = iobuf_get (this->ctx->iobuf_pool); - if (!iobuf) { + /* Add extra 128 byte to the first iov so that it can + * accommodate "ordinary" non-write requests. It's not + * guaranteed to be big enough, as SETXATTR and namespace + * operations with very long names may grow behind it, + * but it's good enough in most cases (and we can handle + * rest via realloc). + */ + iov_in[0].iov_base = GF_CALLOC (1, msg0_size, + gf_fuse_mt_iov_base); + + if (!iobuf || !iov_in[0].iov_base) { gf_log (this->name, GF_LOG_ERROR, "Out of memory"); + if (iobuf) + iobuf_unref (iobuf); + GF_FREE (iov_in[0].iov_base); sleep (10); continue; } - res = fuse_chan_receive (priv->ch, iobuf->ptr, chan_size); + iov_in[1].iov_base = iobuf->ptr; - if (priv->first_call) { - if (priv->first_call > 1) { - priv->first_call--; - } else { - fuse_root_lookup (this); - } - } + res = readv (priv->fd, iov_in, 2); if (res == -1) { + if (errno == ENODEV || errno == EBADF) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "terminating upon getting %s when " + "reading /dev/fuse", + errno == ENODEV ? "ENODEV" : "EBADF"); + fuse_log_eh (this, "glusterfs-fuse: terminating" + " upon getting %s when " + "reading /dev/fuse", + errno == ENODEV ? "ENODEV": + "EBADF"); + break; + } if (errno != EINTR) { gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "fuse_chan_receive() returned -1 (%d)", errno); + "read from /dev/fuse returned -1 (%s)", + strerror (errno)); + fuse_log_eh (this, "glusterfs-fuse: read from " + "/dev/fuse returned -1 (%s)", + strerror (errno)); } - if (errno == ENODEV) - break; - continue; + + goto cont_err; + } + if (res < sizeof (finh)) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "short read on /dev/fuse"); + fuse_log_eh (this, "glusterfs-fuse: short read on " + "/dev/fuse"); + break; + } + + finh = (fuse_in_header_t *)iov_in[0].iov_base; + + if (res != finh->len +#ifdef GF_DARWIN_HOST_OS + /* work around fuse4bsd/MacFUSE msg size miscalculation bug, + * that is, payload size is not taken into account for + * buffered writes + */ + && !(finh->opcode == FUSE_WRITE && + finh->len == sizeof(*finh) + sizeof(struct fuse_write_in) && + res == finh->len + ((struct fuse_write_in *)(finh + 1))->size) +#endif + ) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "inconsistent read on /dev/fuse"); + fuse_log_eh (this, "glusterfs-fuse: inconsistent read " + "on /dev/fuse"); + break; } priv->iobuf = iobuf; - if (res && res != -1) { - fuse_session_process (priv->se, iobuf->ptr, - res, priv->ch); + if (finh->opcode == FUSE_WRITE) + msg = iov_in[1].iov_base; + else { + if (res > msg0_size) { + void *b = GF_REALLOC (iov_in[0].iov_base, res); + if (b) { + iov_in[0].iov_base = b; + finh = (fuse_in_header_t *) + iov_in[0].iov_base; + } + else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "Out of memory"); + send_fuse_err (this, finh, ENOMEM); + + goto cont_err; + } + } + + if (res > iov_in[0].iov_len) + memcpy (iov_in[0].iov_base + iov_in[0].iov_len, + iov_in[1].iov_base, + res - iov_in[0].iov_len); + + msg = finh + 1; } + if (priv->uid_map_root && + finh->uid == priv->uid_map_root) + finh->uid = 0; + + if (finh->opcode >= FUSE_OP_HIGH) + /* turn down MacFUSE specific messages */ + fuse_enosys (this, finh, msg); + else + fuse_ops[finh->opcode] (this, finh, msg); + + iobuf_unref (iobuf); + continue; + cont_err: iobuf_unref (iobuf); + GF_FREE (iov_in[0].iov_base); } - if (dict_get (this->options, ZR_MOUNTPOINT_OPT)) - mount_point = data_to_str (dict_get (this->options, - ZR_MOUNTPOINT_OPT)); - if (mount_point) { - gf_log (this->name, GF_LOG_NORMAL, - "unmounting %s", mount_point); - dict_del (this->options, ZR_MOUNTPOINT_OPT); - } + /* + * We could be in all sorts of states with respect to iobuf and iov_in + * by the time we get here, and it's just not worth untangling them if + * we're about to kill ourselves anyway. + */ - fuse_session_remove_chan (priv->ch); - fuse_session_destroy (priv->se); - // fuse_unmount (priv->mount_point, priv->ch); - - raise (SIGTERM); - + if (dict_get (this->options, ZR_MOUNTPOINT_OPT)) + mount_point = data_to_str (dict_get (this->options, + ZR_MOUNTPOINT_OPT)); + if (mount_point) { + gf_log (this->name, GF_LOG_INFO, + "unmounting %s", mount_point); + } + + /* Kill the whole process, not just this thread. */ + kill (getpid(), SIGTERM); return NULL; } int32_t +fuse_itable_dump (xlator_t *this) +{ + if (!this) + return -1; + + gf_proc_dump_add_section("xlator.mount.fuse.itable"); + inode_table_dump(this->itable, "xlator.mount.fuse.itable"); + + return 0; +} + +int32_t +fuse_priv_dump (xlator_t *this) +{ + fuse_private_t *private = NULL; + + if (!this) + return -1; + + private = this->private; + + if (!private) + return -1; + + gf_proc_dump_add_section("xlator.mount.fuse.priv"); + + gf_proc_dump_write("fd", "%d", private->fd); + gf_proc_dump_write("proto_minor", "%u", + private->proto_minor); + gf_proc_dump_write("volfile", "%s", + private->volfile?private->volfile:"None"); + gf_proc_dump_write("volfile_size", "%d", + private->volfile_size); + gf_proc_dump_write("mount_point", "%s", + private->mount_point); + gf_proc_dump_write("iobuf", "%u", + private->iobuf); + gf_proc_dump_write("fuse_thread_started", "%d", + (int)private->fuse_thread_started); + gf_proc_dump_write("direct_io_mode", "%d", + private->direct_io_mode); + gf_proc_dump_write("entry_timeout", "%lf", + private->entry_timeout); + gf_proc_dump_write("attribute_timeout", "%lf", + private->attribute_timeout); + gf_proc_dump_write("init_recvd", "%d", + (int)private->init_recvd); + gf_proc_dump_write("strict_volfile_check", "%d", + (int)private->strict_volfile_check); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); + + return 0; +} + +int +fuse_history_dump (xlator_t *this) +{ + int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,}; + + GF_VALIDATE_OR_GOTO ("fuse", this, out); + GF_VALIDATE_OR_GOTO (this->name, this->history, out); + + gf_proc_dump_build_key (key_prefix, "xlator.mount.fuse", + "history"); + gf_proc_dump_add_section (key_prefix); + eh_dump (this->history, NULL, dump_history_fuse); + + ret = 0; +out: + return ret; +} + +int +dump_history_fuse (circular_buffer_t *cb, void *data) +{ + char *string = NULL; + struct tm *tm = NULL; + char timestr[256] = {0,}; + + string = (char *)cb->data; + tm = localtime (&cb->tv.tv_sec); + + if (tm) { + strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm); + snprintf (timestr + strlen (timestr), 256 - strlen (timestr), + ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec); + gf_proc_dump_write ("TIME", "%s", timestr); + } + + gf_proc_dump_write ("message", "%s\n", string); + + return 0; +} + +int +fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph) +{ + inode_table_t *itable = NULL; + int ret = 0; + fuse_private_t *priv = NULL; + + priv = this->private; + + /* handle the case of more than one CHILD_UP on same graph */ + if (priv->active_subvol == graph->top) + return 0; /* This is a valid case */ + + if (graph->used) + return 0; + + graph->used = 1; + + itable = inode_table_new (0, graph->top); + if (!itable) + return -1; + + ((xlator_t *)graph->top)->itable = itable; + + pthread_mutex_lock (&priv->sync_mutex); + { + priv->next_graph = graph; + priv->event_recvd = 0; + + pthread_cond_signal (&priv->sync_cond); + } + pthread_mutex_unlock (&priv->sync_mutex); + + gf_log ("fuse", GF_LOG_INFO, "switched to graph %d", + ((graph) ? graph->id : 0)); + + return ret; +} + + +int notify (xlator_t *this, int32_t event, void *data, ...) { - int32_t ret = 0; - fuse_private_t *private = NULL; + int32_t ret = 0; + fuse_private_t *private = NULL; + glusterfs_graph_t *graph = NULL; private = this->private; - + + graph = data; + + gf_log ("fuse", GF_LOG_DEBUG, "got event %d on graph %d", + event, ((graph) ? graph->id : 0)); + switch (event) { + case GF_EVENT_GRAPH_NEW: + break; + case GF_EVENT_CHILD_UP: - case GF_EVENT_CHILD_CONNECTING: + case GF_EVENT_CHILD_DOWN: + case GF_EVENT_CHILD_CONNECTING: { - if (!private->fuse_thread_started) - { - private->fuse_thread_started = 1; + if (graph) { + ret = fuse_graph_setup (this, graph); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to setup the graph"); + } + + if ((event == GF_EVENT_CHILD_UP) + || (event == GF_EVENT_CHILD_DOWN)) { + pthread_mutex_lock (&private->sync_mutex); + { + private->event_recvd = 1; + pthread_cond_broadcast (&private->sync_cond); + } + pthread_mutex_unlock (&private->sync_mutex); + } - ret = pthread_create (&private->fuse_thread, NULL, - fuse_thread_proc, this); + if (!private->fuse_thread_started) { + private->fuse_thread_started = 1; + ret = gf_thread_create (&private->fuse_thread, NULL, + fuse_thread_proc, this); if (ret != 0) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "pthread_create() failed (%s)", + gf_log (this->name, GF_LOG_DEBUG, + "pthread_create() failed (%s)", strerror (errno)); - - /* If fuse thread is not started, that means, - its hung, we can't use this process. */ - raise (SIGTERM); + break; } } + break; } - case GF_EVENT_PARENT_UP: + case GF_EVENT_AUTH_FAILED: { - default_notify (this, GF_EVENT_PARENT_UP, data); - break; - } - case GF_EVENT_VOLFILE_MODIFIED: - { - gf_log ("fuse", GF_LOG_CRITICAL, - "Remote volume file changed, try re-mounting."); - if (private->strict_volfile_check) { - //fuse_session_remove_chan (private->ch); - //fuse_session_destroy (private->se); - //fuse_unmount (private->mount_point, private->ch); - /* TODO: Above code if works, will be a cleaner way, - but for now, lets just achieve what we want */ - raise (SIGTERM); - } - break; + /* Authentication failure is an error and glusterfs should stop */ + gf_log (this->name, GF_LOG_ERROR, "Server authenication failed. Shutting down."); + fini (this); + break; } + default: break; } - return 0; + + return ret; } -static struct fuse_opt subtype_workaround[] = { - FUSE_OPT_KEY("subtype=", 0), - FUSE_OPT_KEY("fssubtype=", 0), - FUSE_OPT_END +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_fuse_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + + +static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { + [FUSE_LOOKUP] = fuse_lookup, + [FUSE_FORGET] = fuse_forget, + [FUSE_GETATTR] = fuse_getattr, + [FUSE_SETATTR] = fuse_setattr, + [FUSE_READLINK] = fuse_readlink, + [FUSE_SYMLINK] = fuse_symlink, + [FUSE_MKNOD] = fuse_mknod, + [FUSE_MKDIR] = fuse_mkdir, + [FUSE_UNLINK] = fuse_unlink, + [FUSE_RMDIR] = fuse_rmdir, + [FUSE_RENAME] = fuse_rename, + [FUSE_LINK] = fuse_link, + [FUSE_OPEN] = fuse_open, + [FUSE_READ] = fuse_readv, + [FUSE_WRITE] = fuse_write, + [FUSE_STATFS] = fuse_statfs, + [FUSE_RELEASE] = fuse_release, + [FUSE_FSYNC] = fuse_fsync, + [FUSE_SETXATTR] = fuse_setxattr, + [FUSE_GETXATTR] = fuse_getxattr, + [FUSE_LISTXATTR] = fuse_listxattr, + [FUSE_REMOVEXATTR] = fuse_removexattr, + [FUSE_FLUSH] = fuse_flush, + [FUSE_INIT] = fuse_init, + [FUSE_OPENDIR] = fuse_opendir, + [FUSE_READDIR] = fuse_readdir, + [FUSE_RELEASEDIR] = fuse_releasedir, + [FUSE_FSYNCDIR] = fuse_fsyncdir, + [FUSE_GETLK] = fuse_getlk, + [FUSE_SETLK] = fuse_setlk, + [FUSE_SETLKW] = fuse_setlk, + [FUSE_ACCESS] = fuse_access, + [FUSE_CREATE] = fuse_create, + /* [FUSE_INTERRUPT] */ + /* [FUSE_BMAP] */ + [FUSE_DESTROY] = fuse_destroy, + /* [FUSE_IOCTL] */ + /* [FUSE_POLL] */ + /* [FUSE_NOTIFY_REPLY] */ + [FUSE_BATCH_FORGET]= fuse_batch_forget, + [FUSE_FALLOCATE] = fuse_fallocate, + [FUSE_READDIRPLUS] = fuse_readdirp, }; -static int -subtype_workaround_optproc(void *data, const char *arg, int key, - struct fuse_args *outargs) + +static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH]; + + +static void +fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg) { - return key ? 1 : 0; + fuse_private_t *priv = NULL; + struct iovec diov[3]; + char r = 'R'; + int ret = 0; + + priv = this->private; + + diov[0].iov_base = &r; + diov[0].iov_len = 1; + diov[1].iov_base = finh; + diov[1].iov_len = sizeof (*finh); + diov[2].iov_base = msg; + diov[2].iov_len = finh->len - sizeof (*finh); + + pthread_mutex_lock (&priv->fuse_dump_mutex); + ret = writev (priv->fuse_dump_fd, diov, 3); + pthread_mutex_unlock (&priv->fuse_dump_mutex); + if (ret == -1) + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "failed to dump fuse message (R): %s", + strerror (errno)); + + priv->fuse_ops0[finh->opcode] (this, finh, msg); } -int + +int init (xlator_t *this_xl) { - int ret = 0; - dict_t *options = NULL; - char *value_string = NULL; - char *fsname = NULL; - char *fsname_opt = NULL; - fuse_private_t *priv = NULL; - struct stat stbuf = {0,}; - struct fuse_args args = FUSE_ARGS_INIT(0, NULL); - char **p = NULL; + int ret = 0; + dict_t *options = NULL; + char *value_string = NULL; + cmd_args_t *cmd_args = NULL; + char *fsname = NULL; + fuse_private_t *priv = NULL; + struct stat stbuf = {0,}; + int i = 0; + int xl_name_allocated = 0; + int fsname_allocated = 0; + glusterfs_ctx_t *ctx = NULL; + gf_boolean_t sync_to_mount = _gf_false; + gf_boolean_t fopen_keep_cache = _gf_false; + unsigned long mntflags = 0; + char *mnt_args = NULL; + eh_t *event = NULL; + + if (this_xl == NULL) + return -1; + + if (this_xl->options == NULL) + return -1; + + ctx = this_xl->ctx; + if (!ctx) + return -1; + + options = this_xl->options; + + if (this_xl->name == NULL) { + this_xl->name = gf_strdup ("fuse"); + if (!this_xl->name) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "Out of memory"); -#ifdef GF_DARWIN_HOST_OS - char *fuse_argv[] = {"glusterfs", - "-o", "XXX", - "-o", "fssubtype=glusterfs", - "-o", "allow_other", - "-o", "default_permissions", - NULL, NULL, - NULL}; - - for (p = fuse_argv; *p; p++); - if (!dict_get (options, "macfuse-local")) { - /* This way, GlusterFS will be detected as 'servers' instead - * of 'devices'. This method is useful if you want to do - * 'umount <mount_point>' over network, instead of 'eject'ing - * it from desktop. Works better for servers - */ - *(p++) = "-o"; - *(p++) = "local"; - } + goto cleanup_exit; + } + xl_name_allocated = 1; + } -#elif GF_LINUX_HOST_OS /* ! DARWIN_OS */ - char *fuse_argv[] = {"glusterfs", - "-o", "XXX", - "-o", "subtype=glusterfs", - "-o", "nonempty", - "-o", "max_readahead=131072", - "-o", "max_read=131072", - "-o", "max_write=131072", - "-o", "allow_other", - "-o", "default_permissions", - "-o", "dev", - "-o", "suid", - NULL}; - -#else /* BSD || SOLARIS */ - /* BSD fuse doesn't support '-o dev', '-o nonempty' option */ - char *fuse_argv[] = {"glusterfs", - "-o", "XXX", - "-o", "subtype=glusterfs", - "-o", "max_readahead=131072", - "-o", "max_read=131072", - "-o", "max_write=131072", - "-o", "allow_other", - "-o", "default_permissions", - "-o", "suid", - NULL}; - -#endif /* ! DARWIN_OS || ! LINUX */ - - if (this_xl == NULL) - return -1; - - if (this_xl->options == NULL) - return -1; - - options = this_xl->options; - - if (this_xl->name == NULL) { - this_xl->name = strdup ("fuse"); - ERR_ABORT(this_xl->name); - } + priv = GF_CALLOC (1, sizeof (*priv), gf_fuse_mt_fuse_private_t); + if (!priv) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "Out of memory"); - fsname = this_xl->ctx->cmd_args.volume_file; - fsname = (fsname ? fsname : this_xl->ctx->cmd_args.volfile_server); - fsname = (fsname ? fsname : "glusterfs"); - ret = asprintf(&fsname_opt, "fsname=%s", fsname); - if (ret == -1) - ERR_ABORT(NULL); - fuse_argv[2] = fsname_opt; - - for (p = fuse_argv; *p; p++); - args.argc = p - fuse_argv; - args.argv = fuse_argv; - - priv = CALLOC (1, sizeof (*priv)); - ERR_ABORT (priv); + goto cleanup_exit; + } this_xl->private = (void *) priv; - - /* get options from option dictionary */ - ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string); - if (value_string == NULL) { + priv->mount_point = NULL; + priv->fd = -1; + priv->revchan_in = -1; + priv->revchan_out = -1; + + /* get options from option dictionary */ + ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string); + if (ret == -1 || value_string == NULL) { gf_log ("fuse", GF_LOG_ERROR, - "Mandatory option 'mountpoint' is not specified."); - goto cleanup_exit; - } + "Mandatory option 'mountpoint' is not specified."); + goto cleanup_exit; + } - if (stat (value_string, &stbuf) != 0) { - if (errno == ENOENT) { - gf_log (this_xl->name, GF_LOG_ERROR, - "%s %s does not exist", - ZR_MOUNTPOINT_OPT, value_string); - } else if (errno == ENOTCONN) { - gf_log (this_xl->name, GF_LOG_ERROR, - "Mountpoint %s seems to have a stale " - "mount, run 'umount %s' and try again.", - value_string, value_string); - } else { - gf_log (this_xl->name, GF_LOG_DEBUG, - "%s %s : stat returned %s", - ZR_MOUNTPOINT_OPT, - value_string, strerror (errno)); - } - goto cleanup_exit; - } - - if (S_ISDIR (stbuf.st_mode) == 0) { - gf_log (this_xl->name, GF_LOG_ERROR, - "%s %s is not a directory", - ZR_MOUNTPOINT_OPT, value_string); - goto cleanup_exit; - } - priv->mount_point = strdup (value_string); - ERR_ABORT(priv->mount_point); - - ret = dict_get_double (options, "attribute-timeout", - &priv->attribute_timeout); - if (!priv->attribute_timeout) - priv->attribute_timeout = 1.0; /* default */ - - ret = dict_get_double (options, "entry-timeout", - &priv->entry_timeout); - if (!priv->entry_timeout) - priv->entry_timeout = 1.0; /* default */ - - - priv->direct_io_mode = 1; - ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string); - if (value_string) { - ret = gf_string2boolean (value_string, &priv->direct_io_mode); - } + if (stat (value_string, &stbuf) != 0) { + if (errno == ENOENT) { + gf_log (this_xl->name, GF_LOG_ERROR, + "%s %s does not exist", + ZR_MOUNTPOINT_OPT, value_string); + } else if (errno == ENOTCONN) { + gf_log (this_xl->name, GF_LOG_ERROR, + "Mountpoint %s seems to have a stale " + "mount, run 'umount %s' and try again.", + value_string, value_string); + } else { + gf_log (this_xl->name, GF_LOG_DEBUG, + "%s %s : stat returned %s", + ZR_MOUNTPOINT_OPT, + value_string, strerror (errno)); + } + goto cleanup_exit; + } - priv->strict_volfile_check = 0; - ret = dict_get_str (options, ZR_STRICT_VOLFILE_CHECK, &value_string); - if (value_string) { - ret = gf_string2boolean (value_string, - &priv->strict_volfile_check); - } + if (S_ISDIR (stbuf.st_mode) == 0) { + gf_log (this_xl->name, GF_LOG_ERROR, + "%s %s is not a directory", + ZR_MOUNTPOINT_OPT, value_string); + goto cleanup_exit; + } + priv->mount_point = gf_strdup (value_string); + if (!priv->mount_point) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "Out of memory"); + + goto cleanup_exit; + } + + GF_OPTION_INIT ("attribute-timeout", priv->attribute_timeout, double, + cleanup_exit); + + GF_OPTION_INIT ("entry-timeout", priv->entry_timeout, double, + cleanup_exit); - priv->ch = fuse_mount (priv->mount_point, &args); - if (priv->ch == NULL) { - if (errno == ENOTCONN) { + GF_OPTION_INIT ("negative-timeout", priv->negative_timeout, double, + cleanup_exit); + + GF_OPTION_INIT ("client-pid", priv->client_pid, int32, cleanup_exit); + /* have to check & register the presence of client-pid manually */ + priv->client_pid_set = !!dict_get (this_xl->options, "client-pid"); + + GF_OPTION_INIT ("uid-map-root", priv->uid_map_root, uint32, + cleanup_exit); + + priv->direct_io_mode = 2; + ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string); + if (ret == 0) { + ret = gf_string2boolean (value_string, &priv->direct_io_mode); + GF_ASSERT (ret == 0); + } + + GF_OPTION_INIT (ZR_STRICT_VOLFILE_CHECK, priv->strict_volfile_check, + bool, cleanup_exit); + + GF_OPTION_INIT ("acl", priv->acl, bool, cleanup_exit); + + if (priv->uid_map_root) + priv->acl = 1; + + GF_OPTION_INIT ("selinux", priv->selinux, bool, cleanup_exit); + + GF_OPTION_INIT ("read-only", priv->read_only, bool, cleanup_exit); + + GF_OPTION_INIT ("enable-ino32", priv->enable_ino32, bool, cleanup_exit); + + GF_OPTION_INIT ("use-readdirp", priv->use_readdirp, bool, cleanup_exit); + + priv->fuse_dump_fd = -1; + ret = dict_get_str (options, "dump-fuse", &value_string); + if (ret == 0) { + ret = unlink (value_string); + if (ret != -1 || errno == ENOENT) + ret = open (value_string, O_RDWR|O_CREAT|O_EXCL, + S_IRUSR|S_IWUSR); + if (ret == -1) { gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "A stale mount is present on %s. " - "Run 'umount %s' and try again", - priv->mount_point, - priv->mount_point); - } else { - if (errno == ENOENT) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "Unable to mount on %s. Run " - "'modprobe fuse' and try again", - priv->mount_point); - } else { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "fuse_mount() failed with error %s " - "on mount point %s", - strerror (errno), - priv->mount_point); - } + "cannot open fuse dump file %s", + value_string); + + goto cleanup_exit; } - + priv->fuse_dump_fd = ret; + } + + sync_to_mount = _gf_false; + ret = dict_get_str (options, "sync-to-mount", &value_string); + if (ret == 0) { + ret = gf_string2boolean (value_string, + &sync_to_mount); + GF_ASSERT (ret == 0); + } + + priv->fopen_keep_cache = 2; + if (dict_get (options, "fopen-keep-cache")) { + GF_OPTION_INIT("fopen-keep-cache", fopen_keep_cache, bool, + cleanup_exit); + priv->fopen_keep_cache = fopen_keep_cache; + } + + GF_OPTION_INIT("gid-timeout", priv->gid_cache_timeout, int32, + cleanup_exit); + + GF_OPTION_INIT ("fuse-mountopts", priv->fuse_mountopts, str, cleanup_exit); + + if (gid_cache_init(&priv->gid_cache, priv->gid_cache_timeout) < 0) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Failed to initialize " + "group cache."); goto cleanup_exit; } - errno = 0; - - priv->se = fuse_lowlevel_new (&args, &fuse_ops, - sizeof (fuse_ops), this_xl); - if (priv->se == NULL && !errno) { - /* - * Option parsing misery. Can happen if libfuse is of - * FUSE < 2.7.0, as then the "-o subtype" option is not - * handled. - * - * Best we can do to is to handle it at runtime -- this is not - * a binary incompatibility issue (which should dealt with at - * compile time), but a behavioural incompatibility issue. Ie. - * we can't tell in advance whether the lib we use supports - * "-o subtype". So try to be clever now. - * - * Delete the subtype option, and try again. - */ - if (fuse_opt_parse(&args, NULL, subtype_workaround, - subtype_workaround_optproc) == 0) - priv->se = fuse_lowlevel_new (&args, &fuse_ops, - sizeof (fuse_ops), - this_xl); - } - - if (priv->se == NULL) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "fuse_lowlevel_new() failed with error %s on " - "mount point %s", - strerror (errno), priv->mount_point); - goto umount_exit; + /* default values seemed to work fine during testing */ + GF_OPTION_INIT ("background-qlen", priv->background_qlen, int32, + cleanup_exit); + GF_OPTION_INIT ("congestion-threshold", priv->congestion_threshold, + int32, cleanup_exit); + + /* user has set only background-qlen, not congestion-threshold, + use the fuse kernel driver formula to set congestion. ie, 75% */ + if (dict_get (this_xl->options, "background-qlen") && + !dict_get (this_xl->options, "congestion-threshold")) { + priv->congestion_threshold = (priv->background_qlen * 3) / 4; + gf_log (this_xl->name, GF_LOG_INFO, + "setting congestion control as 75%% of " + "background-queue length (ie, (.75 * %d) = %d", + priv->background_qlen, priv->congestion_threshold); } - - ret = fuse_set_signal_handlers (priv->se); - if (ret == -1) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "fuse_set_signal_handlers() failed on mount point %s", - priv->mount_point); - goto umount_exit; - } - - fuse_opt_free_args (&args); - FREE (fsname_opt); - - fuse_session_add_chan (priv->se, priv->ch); - - priv->fd = fuse_chan_fd (priv->ch); - - this_xl->ctx->top = this_xl; - - priv->first_call = 2; - this_xl->itable = inode_table_new (0, this_xl); + + /* congestion should not be higher than background queue length */ + if (priv->congestion_threshold > priv->background_qlen) { + gf_log (this_xl->name, GF_LOG_INFO, + "setting congestion control same as " + "background-queue length (%d)", + priv->background_qlen); + priv->congestion_threshold = priv->background_qlen; + } + + cmd_args = &this_xl->ctx->cmd_args; + fsname = cmd_args->volfile; + if (!fsname && cmd_args->volfile_server) { + if (cmd_args->volfile_id) { + fsname = GF_MALLOC ( + strlen (cmd_args->volfile_server) + 1 + + strlen (cmd_args->volfile_id) + 1, + gf_fuse_mt_fuse_private_t); + if (!fsname) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "Out of memory"); + goto cleanup_exit; + } + fsname_allocated = 1; + strcpy (fsname, cmd_args->volfile_server); + strcat (fsname, ":"); + strcat (fsname, cmd_args->volfile_id); + } else + fsname = cmd_args->volfile_server; + } + if (!fsname) + fsname = "glusterfs"; + + priv->fdtable = gf_fd_fdtable_alloc (); + if (priv->fdtable == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); + goto cleanup_exit; + } + + if (priv->read_only) + mntflags |= MS_RDONLY; + gf_asprintf (&mnt_args, "%s%s%sallow_other,max_read=131072", + priv->acl ? "" : "default_permissions,", + priv->fuse_mountopts ? priv->fuse_mountopts : "", + priv->fuse_mountopts ? "," : ""); + if (!mnt_args) + goto cleanup_exit; + + if (pipe(priv->status_pipe) < 0) { + gf_log (this_xl->name, GF_LOG_ERROR, + "could not create pipe to separate mount process"); + goto cleanup_exit; + } + + priv->fd = gf_fuse_mount (priv->mount_point, fsname, mntflags, mnt_args, + sync_to_mount ? &ctx->mnt_pid : NULL, + priv->status_pipe[1]); + if (priv->fd == -1) + goto cleanup_exit; + + event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false, NULL); + if (!event) { + gf_log (this_xl->name, GF_LOG_ERROR, + "could not create a new event history"); + goto cleanup_exit; + } + + this_xl->history = event; + + pthread_mutex_init (&priv->fuse_dump_mutex, NULL); + pthread_cond_init (&priv->sync_cond, NULL); + pthread_mutex_init (&priv->sync_mutex, NULL); + priv->event_recvd = 0; + + for (i = 0; i < FUSE_OP_HIGH; i++) { + if (!fuse_std_ops[i]) + fuse_std_ops[i] = fuse_enosys; + if (!fuse_dump_ops[i]) + fuse_dump_ops[i] = fuse_dumper; + } + priv->fuse_ops = fuse_std_ops; + if (priv->fuse_dump_fd != -1) { + priv->fuse_ops0 = priv->fuse_ops; + priv->fuse_ops = fuse_dump_ops; + } + + if (fsname_allocated) + GF_FREE (fsname); + GF_FREE (mnt_args); return 0; - -umount_exit: - fuse_unmount (priv->mount_point, priv->ch); + cleanup_exit: - fuse_opt_free_args (&args); - FREE (fsname_opt); - if (priv) - FREE (priv->mount_point); - FREE (priv); + if (xl_name_allocated) + GF_FREE (this_xl->name); + if (fsname_allocated) + GF_FREE (fsname); + if (priv) { + GF_FREE (priv->mount_point); + if (priv->fd != -1) + close (priv->fd); + if (priv->fuse_dump_fd != -1) + close (priv->fuse_dump_fd); + GF_FREE (priv); + } + GF_FREE (mnt_args); return -1; } @@ -2827,54 +5408,121 @@ void fini (xlator_t *this_xl) { fuse_private_t *priv = NULL; - char *mount_point = NULL; - - if (this_xl == NULL) - return; - - if ((priv = this_xl->private) == NULL) - return; - - if (dict_get (this_xl->options, ZR_MOUNTPOINT_OPT)) - mount_point = data_to_str (dict_get (this_xl->options, - ZR_MOUNTPOINT_OPT)); - if (mount_point != NULL) { - gf_log (this_xl->name, GF_LOG_NORMAL, - "Unmounting '%s'.", mount_point); - - dict_del (this_xl->options, ZR_MOUNTPOINT_OPT); - fuse_session_exit (priv->se); - fuse_unmount (mount_point, priv->ch); - } + char *mount_point = NULL; + + if (this_xl == NULL) + return; + + if ((priv = this_xl->private) == NULL) + return; + + if (dict_get (this_xl->options, ZR_MOUNTPOINT_OPT)) + mount_point = data_to_str (dict_get (this_xl->options, + ZR_MOUNTPOINT_OPT)); + if (mount_point != NULL) { + gf_log (this_xl->name, GF_LOG_INFO, + "Unmounting '%s'.", mount_point); + + gf_fuse_unmount (mount_point, priv->fd); + close (priv->fuse_dump_fd); + dict_del (this_xl->options, ZR_MOUNTPOINT_OPT); + } + /* Process should terminate once fuse xlator is finished. + * Required for AUTH_FAILED event. + */ + kill (getpid (), SIGTERM); } -struct xlator_fops fops = { -}; +struct xlator_fops fops; struct xlator_cbks cbks = { + .invalidate = fuse_invalidate, + .forget = fuse_forget_cbk, }; -struct xlator_mops mops = { + +struct xlator_dumpops dumpops = { + .priv = fuse_priv_dump, + .inode = fuse_itable_dump, + .history = fuse_history_dump, }; struct volume_options options[] = { - { .key = {"direct-io-mode"}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {"macfuse-local"}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {"mountpoint", "mount-point"}, - .type = GF_OPTION_TYPE_PATH - }, - { .key = {"attribute-timeout"}, - .type = GF_OPTION_TYPE_DOUBLE - }, - { .key = {"entry-timeout"}, - .type = GF_OPTION_TYPE_DOUBLE - }, - { .key = {"strict-volfile-check"}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {NULL} }, + { .key = {"direct-io-mode"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {ZR_MOUNTPOINT_OPT, "mount-point"}, + .type = GF_OPTION_TYPE_PATH + }, + { .key = {ZR_DUMP_FUSE, "fuse-dumpfile"}, + .type = GF_OPTION_TYPE_PATH + }, + { .key = {ZR_ATTR_TIMEOUT_OPT}, + .type = GF_OPTION_TYPE_DOUBLE, + .default_value = "1.0" + }, + { .key = {ZR_ENTRY_TIMEOUT_OPT}, + .type = GF_OPTION_TYPE_DOUBLE, + .default_value = "1.0" + }, + { .key = {ZR_NEGATIVE_TIMEOUT_OPT}, + .type = GF_OPTION_TYPE_DOUBLE, + .default_value = "0.0" + }, + { .key = {ZR_STRICT_VOLFILE_CHECK}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, + { .key = {"client-pid"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {"uid-map-root"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {"sync-to-mount"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"read-only"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"fopen-keep-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, + { .key = {"gid-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "2" + }, + { .key = {"acl"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, + { .key = {"selinux"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, + { .key = {"enable-ino32"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, + { .key = {"background-qlen"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "64", + .min = 16, + .max = (64 * GF_UNIT_KB), + }, + { .key = {"congestion-threshold"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "48", + .min = 12, + .max = (64 * GF_UNIT_KB), + }, + { .key = {"fuse-mountopts"}, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"use-readdirp"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes" + }, + { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h new file mode 100644 index 000000000..34794b6ea --- /dev/null +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -0,0 +1,537 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GF_FUSE_BRIDGE_H_ +#define _GF_FUSE_BRIDGE_H_ + +#include <stdint.h> +#include <signal.h> +#include <pthread.h> +#include <stddef.h> +#include <dirent.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <fnmatch.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif /* _CONFIG_H */ + +#include "glusterfs.h" +#include "logging.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "statedump.h" + +#ifdef GF_DARWIN_HOST_OS +/* This is MacFUSE's marker for MacFUSE-specific code */ +#define __FreeBSD__ 10 +#include "fuse_kernel_macfuse.h" +#else +#include "fuse_kernel.h" +#endif +#include "fuse-misc.h" +#include "fuse-mount.h" +#include "fuse-mem-types.h" + +#include "list.h" +#include "dict.h" +#include "syncop.h" +#include "gidcache.h" + +#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__) +#define FUSE_OP_HIGH (FUSE_READDIRPLUS + 1) +#endif +#ifdef GF_DARWIN_HOST_OS +#define FUSE_OP_HIGH (FUSE_DESTROY + 1) +#endif +#define GLUSTERFS_XATTR_LEN_MAX 65536 + +#define MAX_FUSE_PROC_DELAY 1 + +typedef struct fuse_in_header fuse_in_header_t; +typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh, + void *msg); + +struct fuse_private { + int fd; + uint32_t proto_minor; + char *volfile; + size_t volfile_size; + char *mount_point; + struct iobuf *iobuf; + + pthread_t fuse_thread; + char fuse_thread_started; + + uint32_t direct_io_mode; + size_t *msg0_len_p; + + double entry_timeout; + double negative_timeout; + double attribute_timeout; + + pthread_cond_t sync_cond; + pthread_mutex_t sync_mutex; + char event_recvd; + + char init_recvd; + + gf_boolean_t strict_volfile_check; + + fuse_handler_t **fuse_ops; + fuse_handler_t **fuse_ops0; + pthread_mutex_t fuse_dump_mutex; + int fuse_dump_fd; + + glusterfs_graph_t *next_graph; + xlator_t *active_subvol; + + pid_t client_pid; + gf_boolean_t client_pid_set; + unsigned uid_map_root; + gf_boolean_t acl; + gf_boolean_t selinux; + gf_boolean_t read_only; + int32_t fopen_keep_cache; + int32_t gid_cache_timeout; + gf_boolean_t enable_ino32; + fdtable_t *fdtable; + gid_cache_t gid_cache; + char *fuse_mountopts; + + /* For fuse-reverse-validation */ + int revchan_in; + int revchan_out; + gf_boolean_t reverse_fuse_thread_started; + + /* For communicating with separate mount thread. */ + int status_pipe[2]; + + /* for fuse queue length and congestion threshold */ + int background_qlen; + int congestion_threshold; + + /* for using fuse-kernel readdirp*/ + gf_boolean_t use_readdirp; +}; +typedef struct fuse_private fuse_private_t; + +struct fuse_graph_switch_args { + xlator_t *this; + xlator_t *old_subvol; + xlator_t *new_subvol; +}; +typedef struct fuse_graph_switch_args fuse_graph_switch_args_t; + +#define INVAL_BUF_SIZE (sizeof (struct fuse_out_header) + \ + max (sizeof (struct fuse_notify_inval_inode_out), \ + sizeof (struct fuse_notify_inval_entry_out) + \ + NAME_MAX + 1)) + +#define FUSE_EVENT_HISTORY_SIZE 1024 + +#define _FH_TO_FD(fh) ((fd_t *)(uintptr_t)(fh)) + +#define FH_TO_FD(fh) ((_FH_TO_FD (fh))?(fd_ref (_FH_TO_FD (fh))):((fd_t *) 0)) + +#define FUSE_FOP(state, ret, op_num, fop, args ...) \ + do { \ + call_frame_t *frame = NULL; \ + xlator_t *xl = NULL; \ + int32_t op_ret = 0, op_errno = 0; \ + fuse_resolve_t *resolve = NULL; \ + \ + frame = get_call_frame_for_req (state); \ + if (!frame) { \ + /* This is not completely clean, as some \ + * earlier allocations might remain unfreed \ + * if we return at this point, but still \ + * better than trying to go on with a NULL \ + * frame ... \ + */ \ + gf_log_callingfn ("glusterfs-fuse", \ + GF_LOG_ERROR, \ + "FUSE message" \ + " unique %"PRIu64" opcode %d:" \ + " frame allocation failed", \ + state->finh->unique, \ + state->finh->opcode); \ + free_fuse_state (state); \ + /* ideally, need to 'return', but let the */ \ + /* calling function take care of it */ \ + break; \ + } \ + \ + frame->root->state = state; \ + frame->root->op = op_num; \ + frame->op = op_num; \ + \ + if ( state->resolve_now ) { \ + resolve = state->resolve_now; \ + } else { \ + resolve = &(state->resolve); \ + } \ + \ + xl = state->active_subvol; \ + if (!xl) { \ + gf_log_callingfn ("glusterfs-fuse", GF_LOG_ERROR, \ + "xl is NULL"); \ + op_errno = ENOENT; \ + op_ret = -1; \ + } else if (resolve->op_ret < 0) { \ + op_errno = resolve->op_errno; \ + op_ret = -1; \ + if (op_num == GF_FOP_LOOKUP) { \ + gf_log ("glusterfs-fuse", \ + (op_errno == ENOENT ? GF_LOG_TRACE \ + : GF_LOG_WARNING), \ + "%"PRIu64": %s() %s => -1 (%s)", \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + resolve->resolve_loc.path, \ + strerror (op_errno)); \ + } else { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%"PRIu64": %s() inode " \ + "migration of %s failed (%s)", \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + resolve->resolve_loc.path, \ + strerror (op_errno)); \ + } \ + } else if (state->resolve2.op_ret < 0) { \ + op_errno = state->resolve2.op_errno; \ + op_ret = -1; \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%"PRIu64": %s() inode " \ + "migration of %s failed (%s)", \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->resolve2.resolve_loc.path, \ + strerror (op_errno)); \ + } \ + \ + if (op_ret < 0) { \ + send_fuse_err (state->this, state->finh, op_errno); \ + free_fuse_state (state); \ + STACK_DESTROY (frame->root); \ + } else { \ + if (state->this->history) \ + gf_log_eh ("%"PRIu64", %s, path: (%s), gfid: " \ + "(%s)", frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->loc.path, \ + (state->fd == NULL)? \ + uuid_utoa (state->loc.gfid): \ + uuid_utoa (state->fd->inode->gfid));\ + STACK_WIND (frame, ret, xl, xl->fops->fop, args); \ + } \ + \ + } while (0) + + +#define FUSE_FOP_COOKIE(state, xl, ret, cky, op_num, fop, args ...) \ + do { \ + call_frame_t *frame = NULL; \ + xlator_t *xl = NULL; \ + int32_t op_ret = 0, op_errno = 0; \ + \ + frame = get_call_frame_for_req (state); \ + if (!frame) { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_ERROR, \ + "FUSE message" \ + " unique %"PRIu64" opcode %d:" \ + " frame allocation failed", \ + state->finh->unique, \ + state->finh->opcode); \ + free_fuse_state (state); \ + return 0; \ + } \ + \ + frame->root->state = state; \ + frame->root->op = op_num; \ + frame->op = op_num; \ + \ + xl = state->active_subvol; \ + if (!xl) { \ + gf_log_callingfn ("glusterfs-fuse", GF_LOG_ERROR, \ + "xl is NULL"); \ + op_errno = ENOENT; \ + op_ret = -1; \ + } else if (state->resolve.op_ret < 0) { \ + op_errno = state->resolve.op_errno; \ + op_ret = -1; \ + if (op_num == GF_FOP_LOOKUP) { \ + gf_log ("glusterfs-fuse", \ + (op_errno == ENOENT ? GF_LOG_TRACE \ + : GF_LOG_WARNING), \ + "%"PRIu64": %s() %s => -1 (%s)", \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->resolve.resolve_loc.path, \ + strerror (op_errno)); \ + } else { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%"PRIu64": %s() inode " \ + "migration of %s failed (%s)", \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->resolve.resolve_loc.path, \ + strerror (op_errno)); \ + } \ + } else if (state->resolve2.op_ret < 0) { \ + op_errno = state->resolve2.op_errno; \ + op_ret = -1; \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%"PRIu64": %s() inode " \ + "migration of %s failed (%s)", \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->resolve2.resolve_loc.path, \ + strerror (op_errno)); \ + } \ + \ + if (op_ret < 0) { \ + send_fuse_err (state->this, state->finh, op_errno); \ + free_fuse_state (state); \ + STACK_DESTROY (frame->root); \ + } else { \ + if (xl->history) \ + gf_log_eh ("%"PRIu64", %s, path: (%s), gfid: " \ + "(%s)", frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->loc.path, \ + uuid_utoa (state->loc.gfid)); \ + STACK_WIND_COOKIE (frame, ret, cky, xl, xl->fops->fop, \ + args); \ + } \ + } while (0) + +#define GF_SELECT_LOG_LEVEL(_errno) \ + (((_errno == ENOENT) || (_errno == ESTALE))? \ + GF_LOG_DEBUG) + +#define GET_STATE(this, finh, state) \ + do { \ + state = get_fuse_state (this, finh); \ + if (!state) { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_ERROR, \ + "FUSE message unique %"PRIu64" opcode %d:" \ + " state allocation failed", \ + finh->unique, finh->opcode); \ + \ + send_fuse_err (this, finh, ENOMEM); \ + GF_FREE (finh); \ + \ + return; \ + } \ + } while (0) + +#define FUSE_ENTRY_CREATE(this, priv, finh, state, fci, op) \ + do { \ + if (priv->proto_minor >= 12) \ + state->mode &= ~fci->umask; \ + if (priv->proto_minor >= 12 && priv->acl) { \ + state->xdata = dict_new (); \ + if (!state->xdata) { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%s failed to allocate " \ + "a param dictionary", op); \ + send_fuse_err (this, finh, ENOMEM); \ + free_fuse_state (state); \ + return; \ + } \ + state->umask = fci->umask; \ + \ +/* TODO: remove this after 3.4.0 release. keeping it for the \ + sake of backward compatibility with old (3.3.[01]) \ + releases till then. */ \ + ret = dict_set_int16 (state->xdata, "umask", \ + fci->umask); \ + if (ret < 0) { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%s Failed adding umask"\ + " to request", op); \ + dict_destroy (state->xdata); \ + send_fuse_err (this, finh, ENOMEM); \ + free_fuse_state (state); \ + return; \ + } \ + ret = dict_set_int16 (state->xdata, "mode", \ + fci->mode); \ + if (ret < 0) { \ + gf_log ("glusterfs-fuse", \ + GF_LOG_WARNING, \ + "%s Failed adding mode " \ + "to request", op); \ + dict_destroy (state->xdata); \ + send_fuse_err (this, finh, ENOMEM); \ + free_fuse_state (state); \ + return; \ + } \ + } \ + } while (0) + +#define fuse_log_eh_fop(this, state, frame, op_ret, op_errno) \ + do { \ + if (this->history) { \ + if (state->fd) \ + gf_log_eh ("op_ret: %d, op_errno: %d, " \ + "%"PRIu64", %s () => %p, gfid: %s", \ + op_ret, op_errno, \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->fd, \ + uuid_utoa (state->fd->inode->gfid)); \ + else \ + gf_log_eh ("op_ret: %d, op_errno: %d, " \ + "%"PRIu64", %s () => %s, gfid: %s", \ + op_ret, op_errno, \ + frame->root->unique, \ + gf_fop_list[frame->root->op], \ + state->loc.path, \ + uuid_utoa (state->loc.gfid)); \ + } \ + } while(0) + +#define fuse_log_eh(this, args...) \ + do { \ + if (this->history) \ + gf_log_eh(args); \ + } while (0) + +static inline xlator_t * +fuse_active_subvol (xlator_t *fuse) +{ + fuse_private_t *priv = NULL; + + priv = fuse->private; + + return priv->active_subvol; +} + + +typedef enum { + RESOLVE_MUST = 1, + RESOLVE_NOT, + RESOLVE_MAY, + RESOLVE_DONTCARE, + RESOLVE_EXACT +} fuse_resolve_type_t; + + +typedef struct { + fuse_resolve_type_t type; + fd_t *fd; + char *path; + char *bname; + u_char gfid[16]; + inode_t *hint; + u_char pargfid[16]; + inode_t *parhint; + char *resolved; + int op_ret; + int op_errno; + loc_t resolve_loc; +} fuse_resolve_t; + + +typedef struct { + void *pool; + xlator_t *this; + xlator_t *active_subvol; + inode_table_t *itable; + loc_t loc; + loc_t loc2; + fuse_in_header_t *finh; + int32_t flags; + off_t off; + size_t size; + unsigned long nlookup; + fd_t *fd; + dict_t *xattr; + dict_t *xdata; + char *name; + char is_revalidate; + gf_boolean_t truncate_needed; + gf_lock_t lock; + uint64_t lk_owner; + + /* used within resolve_and_resume */ + /* */ + fuse_resolve_t resolve; + fuse_resolve_t resolve2; + + loc_t *loc_now; + fuse_resolve_t *resolve_now; + + void *resume_fn; + + int valid; + int mask; + dev_t rdev; + mode_t mode; + mode_t umask; + struct iatt attr; + struct gf_flock lk_lock; + struct iovec vector; + + uuid_t gfid; + uint32_t io_flags; + int32_t fd_no; +} fuse_state_t; + +typedef struct { + uint32_t open_flags; + char migration_failed; + fd_t *activefd; +} fuse_fd_ctx_t; + +typedef void (*fuse_resume_fn_t) (fuse_state_t *state); + +GF_MUST_CHECK int32_t +fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino, + ino_t par, const char *name); +call_frame_t *get_call_frame_for_req (fuse_state_t *state); +fuse_state_t *get_fuse_state (xlator_t *this, fuse_in_header_t *finh); +void free_fuse_state (fuse_state_t *state); +void gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa, + gf_boolean_t enable_ino32); +void gf_fuse_fill_dirent (gf_dirent_t *entry, struct fuse_dirent *fde, + gf_boolean_t enable_ino32); +uint64_t inode_to_fuse_nodeid (inode_t *inode); +xlator_t *fuse_active_subvol (xlator_t *fuse); +inode_t *fuse_ino_to_inode (uint64_t ino, xlator_t *fuse); +int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error); +int fuse_gfid_set (fuse_state_t *state); +int fuse_flip_xattr_ns (struct fuse_private *priv, char *okey, char **nkey); +fuse_fd_ctx_t * __fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd); +fuse_fd_ctx_t * fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd); + +int fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn); +int fuse_resolve_inode_init (fuse_state_t *state, fuse_resolve_t *resolve, + ino_t ino); +int fuse_resolve_entry_init (fuse_state_t *state, fuse_resolve_t *resolve, + ino_t par, char *name); +int fuse_resolve_fd_init (fuse_state_t *state, fuse_resolve_t *resolve, + fd_t *fd); +int fuse_ignore_xattr_set (fuse_private_t *priv, char *key); +int dump_history_fuse (circular_buffer_t *cb, void *data); +#endif /* _GF_FUSE_BRIDGE_H_ */ diff --git a/xlators/mount/fuse/src/fuse-extra.c b/xlators/mount/fuse/src/fuse-extra.c deleted file mode 100644 index 95bd0f3ad..000000000 --- a/xlators/mount/fuse/src/fuse-extra.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif /* _CONFIG_H */ - -#include "fuse-extra.h" -#include "common-utils.h" -#include <stdio.h> -#include <pthread.h> -#include <stdlib.h> -#include <string.h> -#include "common-utils.h" - -struct fuse_req; -struct fuse_ll; - -struct fuse_req { - struct fuse_ll *f; - uint64_t unique; - int ctr; - pthread_mutex_t lock; - struct fuse_ctx ctx; - struct fuse_chan *ch; - int interrupted; - union { - struct { - uint64_t unique; - } i; - struct { - fuse_interrupt_func_t func; - void *data; - } ni; - } u; - struct fuse_req *next; - struct fuse_req *prev; -}; - -struct fuse_ll { - int debug; - int allow_root; - struct fuse_lowlevel_ops op; - int got_init; - void *userdata; - uid_t owner; - struct fuse_conn_info conn; - struct fuse_req list; - struct fuse_req interrupts; - pthread_mutex_t lock; - int got_destroy; -}; - -struct fuse_out_header { - uint32_t len; - int32_t error; - uint64_t unique; -}; - -uint64_t req_callid (fuse_req_t req) -{ - return req->unique; -} - -static void destroy_req(fuse_req_t req) -{ - pthread_mutex_destroy (&req->lock); - FREE (req); -} - -static void list_del_req(struct fuse_req *req) -{ - struct fuse_req *prev = req->prev; - struct fuse_req *next = req->next; - prev->next = next; - next->prev = prev; -} - -static void -free_req (fuse_req_t req) -{ - int ctr; - struct fuse_ll *f = req->f; - - pthread_mutex_lock(&req->lock); - req->u.ni.func = NULL; - req->u.ni.data = NULL; - pthread_mutex_unlock(&req->lock); - - pthread_mutex_lock(&f->lock); - list_del_req(req); - ctr = --req->ctr; - pthread_mutex_unlock(&f->lock); - if (!ctr) - destroy_req(req); -} - -int32_t -fuse_reply_vec (fuse_req_t req, - struct iovec *vector, - int32_t count) -{ - int32_t error = 0; - struct fuse_out_header out; - struct iovec *iov; - int res; - - iov = alloca ((count + 1) * sizeof (*vector)); - out.unique = req->unique; - out.error = error; - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - memcpy (&iov[1], vector, count * sizeof (*vector)); - count++; - out.len = iov_length(iov, count); - res = fuse_chan_send(req->ch, iov, count); - free_req(req); - - return res; -} diff --git a/xlators/mount/fuse/src/fuse-extra.h b/xlators/mount/fuse/src/fuse-extra.h deleted file mode 100644 index 5688e34c7..000000000 --- a/xlators/mount/fuse/src/fuse-extra.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _FUSE_EXTRA_H -#define _FUSE_EXTRA_H - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif /* _CONFIG_H */ - -#include <stdlib.h> -#include <fuse/fuse_lowlevel.h> - -#define GLUSTERFS_XATTR_LEN_MAX 65536 - -uint64_t req_callid (fuse_req_t req); - -size_t fuse_dirent_size (size_t dname_len); - -int32_t -fuse_reply_vec (fuse_req_t req, - struct iovec *vector, - int32_t count); - -#endif /* _FUSE_EXTRA_H */ diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c new file mode 100644 index 000000000..4d478b919 --- /dev/null +++ b/xlators/mount/fuse/src/fuse-helpers.c @@ -0,0 +1,605 @@ +/* + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifdef __NetBSD__ +#define _KMEMUSER +#endif + +#include "fuse-bridge.h" +#if defined(GF_SOLARIS_HOST_OS) +#include <sys/procfs.h> +#else +#include <sys/sysctl.h> +#endif + + +static void +fuse_resolve_wipe (fuse_resolve_t *resolve) +{ + GF_FREE ((void *)resolve->path); + + GF_FREE ((void *)resolve->bname); + + GF_FREE ((void *)resolve->resolved); + + if (resolve->fd) + fd_unref (resolve->fd); + + loc_wipe (&resolve->resolve_loc); + + if (resolve->hint) { + inode_unref (resolve->hint); + resolve->hint = 0; + } + + if (resolve->parhint) { + inode_unref (resolve->parhint); + resolve->parhint = 0; + } +} + + +void +free_fuse_state (fuse_state_t *state) +{ + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + uint64_t winds = 0; + char switched = 0; + + this = state->this; + + priv = this->private; + + loc_wipe (&state->loc); + + loc_wipe (&state->loc2); + + if (state->xdata) { + dict_unref (state->xdata); + state->xdata = (void *)0xaaaaeeee; + } + if (state->xattr) + dict_unref (state->xattr); + + if (state->name) { + GF_FREE (state->name); + state->name = NULL; + } + if (state->fd) { + fd_unref (state->fd); + state->fd = (void *)0xfdfdfdfd; + } + if (state->finh) { + GF_FREE (state->finh); + state->finh = NULL; + } + + fuse_resolve_wipe (&state->resolve); + fuse_resolve_wipe (&state->resolve2); + + pthread_mutex_lock (&priv->sync_mutex); + { + winds = --state->active_subvol->winds; + switched = state->active_subvol->switched; + } + pthread_mutex_unlock (&priv->sync_mutex); + + if ((winds == 0) && (switched)) { + xlator_notify (state->active_subvol, GF_EVENT_PARENT_DOWN, + state->active_subvol, NULL); + } + +#ifdef DEBUG + memset (state, 0x90, sizeof (*state)); +#endif + GF_FREE (state); + state = NULL; +} + + +fuse_state_t * +get_fuse_state (xlator_t *this, fuse_in_header_t *finh) +{ + fuse_state_t *state = NULL; + xlator_t *active_subvol = NULL; + fuse_private_t *priv = NULL; + + state = (void *)GF_CALLOC (1, sizeof (*state), + gf_fuse_mt_fuse_state_t); + if (!state) + return NULL; + + state->this = THIS; + priv = this->private; + + pthread_mutex_lock (&priv->sync_mutex); + { + active_subvol = fuse_active_subvol (state->this); + active_subvol->winds++; + } + pthread_mutex_unlock (&priv->sync_mutex); + + state->active_subvol = active_subvol; + state->itable = active_subvol->itable; + + state->pool = this->ctx->pool; + state->finh = finh; + state->this = this; + + LOCK_INIT (&state->lock); + + return state; +} + + +#define FUSE_MAX_AUX_GROUPS 32 /* We can get only up to 32 aux groups from /proc */ +void +frame_fill_groups (call_frame_t *frame) +{ +#if defined(GF_LINUX_HOST_OS) + char filename[32]; + char line[4096]; + char *ptr = NULL; + FILE *fp = NULL; + int idx = 0; + long int id = 0; + char *saveptr = NULL; + char *endptr = NULL; + int ret = 0; + + ret = snprintf (filename, sizeof filename, "/proc/%d/status", frame->root->pid); + if (ret >= sizeof filename) + goto out; + + fp = fopen (filename, "r"); + if (!fp) + goto out; + + if (call_stack_alloc_groups (frame->root, FUSE_MAX_AUX_GROUPS) != 0) + goto out; + + while ((ptr = fgets (line, sizeof line, fp))) { + if (strncmp (ptr, "Groups:", 7) != 0) + continue; + + ptr = line + 8; + + for (ptr = strtok_r (ptr, " \t\r\n", &saveptr); + ptr; + ptr = strtok_r (NULL, " \t\r\n", &saveptr)) { + errno = 0; + id = strtol (ptr, &endptr, 0); + if (errno == ERANGE) + break; + if (!endptr || *endptr) + break; + frame->root->groups[idx++] = id; + if (idx == FUSE_MAX_AUX_GROUPS) + break; + } + + frame->root->ngrps = idx; + break; + } +out: + if (fp) + fclose (fp); +#elif defined(GF_SOLARIS_HOST_OS) + char filename[32]; + char scratch[128]; + prcred_t *prcred = (prcred_t *) scratch; + FILE *fp = NULL; + int ret = 0; + int ngrps; + + ret = snprintf (filename, sizeof filename, + "/proc/%d/cred", frame->root->pid); + + if (ret < sizeof filename) { + fp = fopen (filename, "r"); + if (fp != NULL) { + if (fgets (scratch, sizeof scratch, fp) != NULL) { + ngrps = MIN(prcred->pr_ngroups, + GF_MAX_AUX_GROUPS); + if (call_stack_alloc_groups (frame->root, + ngrps) != 0) + return; + } + fclose (fp); + } + } +#elif defined(CTL_KERN) /* DARWIN and *BSD */ + /* + N.B. CTL_KERN is an enum on Linux. (Meaning, if it's not + obvious, that it's not subject to preprocessor directives + like '#if defined'.) + Unlike Linux, on Mac OS and the BSDs it is a #define. We + could test to see that KERN_PROC is defined, but, barring any + evidence to the contrary, I think that's overkill. + We might also test that GF_DARWIN_HOST_OS is defined, why + limit this to just Mac OS. It's equally valid for the BSDs + and we do have people building on NetBSD and FreeBSD. + */ + int name[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, frame->root->pid }; + size_t namelen = sizeof name / sizeof name[0]; + struct kinfo_proc kp; + size_t kplen = sizeof(kp); + int i, ngroups; + + if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0) + return; + ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, GF_MAX_AUX_GROUPS); + if (call_stack_alloc_groups (frame->root, ngroups) != 0) + return; + for (i = 0; i < ngroups; i++) + frame->root->groups[i] = kp.kp_eproc.e_ucred.cr_groups[i]; + frame->root->ngrps = ngroups; +#else + frame->root->ngrps = 0; +#endif /* GF_LINUX_HOST_OS */ +} + +/* + * Get the groups for the PID associated with this frame. If enabled, + * use the gid cache to reduce group list collection. + */ +static void get_groups(fuse_private_t *priv, call_frame_t *frame) +{ + int i; + const gid_list_t *gl; + gid_list_t agl; + + if (-1 == priv->gid_cache_timeout) { + frame->root->ngrps = 0; + return; + } + + if (!priv->gid_cache_timeout) { + frame_fill_groups(frame); + return; + } + + gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid); + if (gl) { + if (call_stack_alloc_groups (frame->root, gl->gl_count) != 0) + return; + frame->root->ngrps = gl->gl_count; + for (i = 0; i < gl->gl_count; i++) + frame->root->groups[i] = gl->gl_list[i]; + gid_cache_release(&priv->gid_cache, gl); + return; + } + + frame_fill_groups (frame); + + agl.gl_id = frame->root->pid; + agl.gl_count = frame->root->ngrps; + agl.gl_list = GF_CALLOC(frame->root->ngrps, sizeof(gid_t), + gf_fuse_mt_gids_t); + if (!agl.gl_list) + return; + + for (i = 0; i < frame->root->ngrps; i++) + agl.gl_list[i] = frame->root->groups[i]; + + if (gid_cache_add(&priv->gid_cache, &agl) != 1) + GF_FREE(agl.gl_list); +} + +call_frame_t * +get_call_frame_for_req (fuse_state_t *state) +{ + call_pool_t *pool = NULL; + fuse_in_header_t *finh = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + + pool = state->pool; + finh = state->finh; + this = state->this; + priv = this->private; + + frame = create_frame (this, pool); + if (!frame) + return NULL; + + if (finh) { + frame->root->uid = finh->uid; + frame->root->gid = finh->gid; + frame->root->pid = finh->pid; + frame->root->unique = finh->unique; + set_lk_owner_from_uint64 (&frame->root->lk_owner, + state->lk_owner); + } + + get_groups(priv, frame); + + if (priv && priv->client_pid_set) + frame->root->pid = priv->client_pid; + + frame->root->type = GF_OP_TYPE_FOP; + + return frame; +} + + +inode_t * +fuse_ino_to_inode (uint64_t ino, xlator_t *fuse) +{ + inode_t *inode = NULL; + xlator_t *active_subvol = NULL; + + if (ino == 1) { + active_subvol = fuse_active_subvol (fuse); + if (active_subvol) + inode = active_subvol->itable->root; + } else { + inode = (inode_t *) (unsigned long) ino; + inode_ref (inode); + } + + return inode; +} + +uint64_t +inode_to_fuse_nodeid (inode_t *inode) +{ + if (!inode) + return 0; + if (__is_root_gfid (inode->gfid)) + return 1; + + return (unsigned long) inode; +} + + +GF_MUST_CHECK int32_t +fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino, + ino_t par, const char *name) +{ + inode_t *inode = NULL; + inode_t *parent = NULL; + int32_t ret = -1; + char *path = NULL; + uuid_t null_gfid = {0,}; + + /* resistance against multiple invocation of loc_fill not to get + reference leaks via inode_search() */ + + if (name) { + parent = loc->parent; + if (!parent) { + parent = fuse_ino_to_inode (par, state->this); + loc->parent = parent; + if (parent) + uuid_copy (loc->pargfid, parent->gfid); + } + + inode = loc->inode; + if (!inode) { + inode = inode_grep (parent->table, parent, name); + loc->inode = inode; + } + + ret = inode_path (parent, name, &path); + if (ret <= 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "inode_path failed for %s/%s", + (parent)?uuid_utoa (parent->gfid):"0", name); + goto fail; + } + loc->path = path; + } else { + inode = loc->inode; + if (!inode) { + inode = fuse_ino_to_inode (ino, state->this); + loc->inode = inode; + if (inode) + uuid_copy (loc->gfid, inode->gfid); + } + + parent = loc->parent; + if (!parent) { + parent = inode_parent (inode, null_gfid, NULL); + loc->parent = parent; + if (parent) + uuid_copy (loc->pargfid, parent->gfid); + + } + + ret = inode_path (inode, NULL, &path); + if (ret <= 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "inode_path failed for %s", + (inode) ? uuid_utoa (inode->gfid) : "0"); + goto fail; + } + loc->path = path; + } + + if (loc->path) { + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + else + loc->name = ""; + } + + if ((ino != 1) && (parent == NULL)) { + gf_log ("fuse-bridge", GF_LOG_DEBUG, + "failed to search parent for %"PRId64"/%s (%"PRId64")", + (ino_t)par, name, (ino_t)ino); + ret = -1; + goto fail; + } + ret = 0; +fail: + /* this should not happen as inode_path returns -1 when buf is NULL + for sure */ + if (path && !loc->path) + GF_FREE (path); + return ret; +} + +/* Use the same logic as the Linux NFS-client */ +#define GF_FUSE_SQUASH_INO(ino) ((uint32_t) ino) ^ (ino >> 32) + +/* courtesy of folly */ +void +gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa, gf_boolean_t enable_ino32) +{ + if (enable_ino32) + fa->ino = GF_FUSE_SQUASH_INO(st->ia_ino); + else + fa->ino = st->ia_ino; + + fa->size = st->ia_size; + fa->blocks = st->ia_blocks; + fa->atime = st->ia_atime; + fa->mtime = st->ia_mtime; + fa->ctime = st->ia_ctime; + fa->atimensec = st->ia_atime_nsec; + fa->mtimensec = st->ia_mtime_nsec; + fa->ctimensec = st->ia_ctime_nsec; + fa->mode = st_mode_from_ia (st->ia_prot, st->ia_type); + fa->nlink = st->ia_nlink; + fa->uid = st->ia_uid; + fa->gid = st->ia_gid; + fa->rdev = makedev (ia_major (st->ia_rdev), + ia_minor (st->ia_rdev)); +#if FUSE_KERNEL_MINOR_VERSION >= 9 + fa->blksize = st->ia_blksize; +#endif +#ifdef GF_DARWIN_HOST_OS + fa->crtime = (uint64_t)-1; + fa->crtimensec = (uint32_t)-1; + fa->flags = 0; +#endif +} + +void +gf_fuse_fill_dirent (gf_dirent_t *entry, struct fuse_dirent *fde, gf_boolean_t enable_ino32) +{ + if (enable_ino32) + fde->ino = GF_FUSE_SQUASH_INO(entry->d_ino); + else + fde->ino = entry->d_ino; + + fde->off = entry->d_off; + fde->type = entry->d_type; + fde->namelen = strlen (entry->d_name); + strncpy (fde->name, entry->d_name, fde->namelen); +} + +static int +fuse_do_flip_xattr_ns (char *okey, const char *nns, char **nkey) +{ + int ret = 0; + char *key = NULL; + + okey = strchr (okey, '.'); + GF_ASSERT (okey); + + key = GF_CALLOC (1, strlen (nns) + strlen(okey) + 1, + gf_common_mt_char); + if (!key) { + ret = -1; + goto out; + } + + strcpy (key, nns); + strcat (key, okey); + + *nkey = key; + + out: + return ret; +} + +static int +fuse_xattr_alloc_default (char *okey, char **nkey) +{ + int ret = 0; + + *nkey = gf_strdup (okey); + if (!*nkey) + ret = -1; + return ret; +} + +#define PRIV_XA_NS "trusted" +#define UNPRIV_XA_NS "system" + +int +fuse_flip_xattr_ns (fuse_private_t *priv, char *okey, char **nkey) +{ + int ret = 0; + gf_boolean_t need_flip = _gf_false; + + switch (priv->client_pid) { + case GF_CLIENT_PID_GSYNCD: + /* valid xattr(s): *xtime, volume-mark* */ + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): " + "volume-mark*, *xtime", priv->client_pid); + if ( (strcmp (okey, UNPRIV_XA_NS".glusterfs.volume-mark") == 0) + || (fnmatch (UNPRIV_XA_NS".glusterfs.volume-mark.*", okey, FNM_PERIOD) == 0) + || (fnmatch (UNPRIV_XA_NS".glusterfs.*.xtime", okey, FNM_PERIOD) == 0) ) + need_flip = _gf_true; + break; + + case GF_CLIENT_PID_HADOOP: + /* valid xattr(s): pathinfo */ + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): " + "pathinfo", priv->client_pid); + if (strcmp (okey, UNPRIV_XA_NS".glusterfs.pathinfo") == 0) + need_flip = _gf_true; + break; + } + + if (need_flip) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "flipping %s to "PRIV_XA_NS" equivalent", + okey); + ret = fuse_do_flip_xattr_ns (okey, PRIV_XA_NS, nkey); + } else { + /* if we cannot match, continue with what we got */ + ret = fuse_xattr_alloc_default (okey, nkey); + } + + return ret; +} + +int +fuse_ignore_xattr_set (fuse_private_t *priv, char *key) +{ + int ret = 0; + + /* don't mess with user namespace */ + if (fnmatch ("user.*", key, FNM_PERIOD) == 0) + goto out; + + if (priv->client_pid != GF_CLIENT_PID_GSYNCD) + goto out; + + /* trusted NS check */ + if (!((fnmatch ("*.glusterfs.*.xtime", key, FNM_PERIOD) == 0) + || (fnmatch ("*.glusterfs.volume-mark", + key, FNM_PERIOD) == 0) + || (fnmatch ("*.glusterfs.volume-mark.*", + key, FNM_PERIOD) == 0))) + ret = -1; + + out: + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "%s setxattr: key [%s], " + " client pid [%d]", (ret ? "disallowing" : "allowing"), key, + priv->client_pid); + + return ret; +} diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h new file mode 100644 index 000000000..28b4dfbdd --- /dev/null +++ b/xlators/mount/fuse/src/fuse-mem-types.h @@ -0,0 +1,28 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __FUSE_MEM_TYPES_H__ +#define __FUSE_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_fuse_mem_types_ { + gf_fuse_mt_iovec = gf_common_mt_end + 1, + gf_fuse_mt_fuse_private_t, + gf_fuse_mt_char, + gf_fuse_mt_iov_base, + gf_fuse_mt_fuse_state_t, + gf_fuse_mt_fd_ctx_t, + gf_fuse_mt_graph_switch_args_t, + gf_fuse_mt_gids_t, + gf_fuse_mt_end +}; +#endif + diff --git a/xlators/mount/fuse/src/fuse-resolve.c b/xlators/mount/fuse/src/fuse-resolve.c new file mode 100644 index 000000000..8565ce0e4 --- /dev/null +++ b/xlators/mount/fuse/src/fuse-resolve.c @@ -0,0 +1,724 @@ +/* + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "fuse-bridge.h" + +static int +fuse_resolve_all (fuse_state_t *state); + +int fuse_resolve_continue (fuse_state_t *state); +int fuse_resolve_entry_simple (fuse_state_t *state); +int fuse_resolve_inode_simple (fuse_state_t *state); +int fuse_migrate_fd (xlator_t *this, fd_t *fd, xlator_t *old_subvol, + xlator_t *new_subvol); + +fuse_fd_ctx_t * +fuse_fd_ctx_get (xlator_t *this, fd_t *fd); + +gf_boolean_t fuse_inode_needs_lookup (inode_t *inode, xlator_t *this); + +static int +fuse_resolve_loc_touchup (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *loc = NULL; + char *path = NULL; + int ret = 0; + + resolve = state->resolve_now; + loc = state->loc_now; + + if (!loc->path) { + if (loc->parent && resolve->bname) { + ret = inode_path (loc->parent, resolve->bname, &path); + uuid_copy (loc->pargfid, loc->parent->gfid); + loc->name = resolve->bname; + } else if (loc->inode) { + ret = inode_path (loc->inode, NULL, &path); + uuid_copy (loc->gfid, loc->inode->gfid); + } + if (ret) + gf_log (THIS->name, GF_LOG_TRACE, + "return value inode_path %d", ret); + loc->path = path; + } + + return 0; +} + + +int +fuse_resolve_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, + struct iatt *postparent) +{ + fuse_state_t *state = NULL; + fuse_resolve_t *resolve = NULL; + inode_t *link_inode = NULL; + loc_t *resolve_loc = NULL; + + state = frame->root->state; + resolve = state->resolve_now; + resolve_loc = &resolve->resolve_loc; + + STACK_DESTROY (frame->root); + + if (op_ret == -1) { + gf_log (this->name, (op_errno == ENOENT) + ? GF_LOG_DEBUG : GF_LOG_WARNING, + "%s/%s: failed to resolve (%s)", + uuid_utoa (resolve_loc->pargfid), resolve_loc->name, + strerror (op_errno)); + resolve->op_ret = -1; + resolve->op_errno = op_errno; + goto out; + } + + link_inode = inode_link (inode, resolve_loc->parent, + resolve_loc->name, buf); + + state->loc_now->inode = link_inode; + +out: + loc_wipe (resolve_loc); + + fuse_resolve_continue (state); + return 0; +} + + +int +fuse_resolve_entry (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *resolve_loc = NULL; + + resolve = state->resolve_now; + resolve_loc = &resolve->resolve_loc; + + resolve_loc->parent = inode_ref (state->loc_now->parent); + uuid_copy (resolve_loc->pargfid, state->loc_now->pargfid); + resolve_loc->name = resolve->bname; + resolve_loc->inode = inode_new (state->itable); + + inode_path (resolve_loc->parent, resolve_loc->name, + (char **) &resolve_loc->path); + + FUSE_FOP (state, fuse_resolve_entry_cbk, GF_FOP_LOOKUP, + lookup, resolve_loc, NULL); + + return 0; +} + + +int +fuse_resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, struct iatt *postparent) +{ + fuse_state_t *state = NULL; + fuse_resolve_t *resolve = NULL; + inode_t *link_inode = NULL; + loc_t *loc_now = NULL; + + state = frame->root->state; + resolve = state->resolve_now; + loc_now = state->loc_now; + + STACK_DESTROY (frame->root); + + if (op_ret == -1) { + gf_log (this->name, (op_errno == ENOENT) + ? GF_LOG_DEBUG : GF_LOG_WARNING, + "%s: failed to resolve (%s)", + uuid_utoa (resolve->resolve_loc.gfid), + strerror (op_errno)); + loc_wipe (&resolve->resolve_loc); + + /* resolve->op_ret can have 3 values: 0, -1, -2. + * 0 : resolution was successful. + * -1: parent inode could not be resolved. + * -2: entry (inode corresponding to path) could not be resolved + */ + + if (uuid_is_null (resolve->gfid)) { + resolve->op_ret = -1; + } else { + resolve->op_ret = -2; + } + + resolve->op_errno = op_errno; + goto out; + } + + loc_wipe (&resolve->resolve_loc); + + link_inode = inode_link (inode, NULL, NULL, buf); + + if (!link_inode) + goto out; + + if (!uuid_is_null (resolve->gfid)) { + loc_now->inode = link_inode; + goto out; + } + + loc_now->parent = link_inode; + uuid_copy (loc_now->pargfid, link_inode->gfid); + + fuse_resolve_entry (state); + + return 0; +out: + fuse_resolve_continue (state); + return 0; +} + + +int +fuse_resolve_gfid (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *resolve_loc = NULL; + int ret = 0; + + resolve = state->resolve_now; + resolve_loc = &resolve->resolve_loc; + + if (!uuid_is_null (resolve->pargfid)) { + uuid_copy (resolve_loc->gfid, resolve->pargfid); + } else if (!uuid_is_null (resolve->gfid)) { + uuid_copy (resolve_loc->gfid, resolve->gfid); + } + + /* inode may already exist in case we are looking up an inode which was + linked through readdirplus */ + resolve_loc->inode = inode_find (state->itable, resolve_loc->gfid); + if (!resolve_loc->inode) + resolve_loc->inode = inode_new (state->itable); + ret = loc_path (resolve_loc, NULL); + + if (ret <= 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get the path for inode %s", + uuid_utoa (resolve->gfid)); + } + + FUSE_FOP (state, fuse_resolve_gfid_cbk, GF_FOP_LOOKUP, + lookup, resolve_loc, NULL); + + return 0; +} + + +/* + * Return value: + * 0 - resolved parent and entry (as necessary) + * -1 - resolved parent but not entry (though necessary) + * 1 - resolved neither parent nor entry + */ + +int +fuse_resolve_parent_simple (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *loc = NULL; + inode_t *parent = NULL; + inode_t *inode = NULL; + + resolve = state->resolve_now; + loc = state->loc_now; + + loc->name = resolve->bname; + + parent = resolve->parhint; + if (parent->table == state->itable) { + if (fuse_inode_needs_lookup (parent, THIS)) + return 1; + + /* no graph switches since */ + loc->parent = inode_ref (parent); + uuid_copy (loc->pargfid, parent->gfid); + loc->inode = inode_grep (state->itable, parent, loc->name); + + /* nodeid for root is 1 and we blindly take the latest graph's + * table->root as the parhint and because of this there is + * ambiguity whether the entry should have existed or not, and + * we took the conservative approach of assuming entry should + * have been there even though it need not have (bug #804592). + */ + if ((loc->inode == NULL) + && __is_root_gfid (parent->gfid)) { + /* non decisive result - entry missing */ + return -1; + } + + /* decisive result - resolution success */ + return 0; + } + + parent = inode_find (state->itable, resolve->pargfid); + if (!parent) { + /* non decisive result - parent missing */ + return 1; + } + if (fuse_inode_needs_lookup (parent, THIS)) { + inode_unref (parent); + return 1; + } + + loc->parent = parent; + uuid_copy (loc->pargfid, resolve->pargfid); + + inode = inode_grep (state->itable, parent, loc->name); + if (inode) { + loc->inode = inode; + /* decisive result - resolution success */ + return 0; + } + + /* non decisive result - entry missing */ + return -1; +} + + +int +fuse_resolve_parent (fuse_state_t *state) +{ + int ret = 0; + + ret = fuse_resolve_parent_simple (state); + if (ret > 0) { + fuse_resolve_gfid (state); + return 0; + } + + if (ret < 0) { + fuse_resolve_entry (state); + return 0; + } + + fuse_resolve_continue (state); + + return 0; +} + + +int +fuse_resolve_inode_simple (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *loc = NULL; + inode_t *inode = NULL; + + resolve = state->resolve_now; + loc = state->loc_now; + + inode = resolve->hint; + if (inode->table == state->itable) + inode_ref (inode); + else + inode = inode_find (state->itable, resolve->gfid); + + if (inode) { + if (!fuse_inode_needs_lookup (inode, THIS)) + goto found; + /* inode was linked through readdirplus */ + inode_unref (inode); + } + + return 1; +found: + loc->inode = inode; + return 0; +} + + +int +fuse_resolve_inode (fuse_state_t *state) +{ + int ret = 0; + + ret = fuse_resolve_inode_simple (state); + + if (ret > 0) { + fuse_resolve_gfid (state); + return 0; + } + + fuse_resolve_continue (state); + + return 0; +} + + +int +fuse_migrate_fd_task (void *data) +{ + int ret = -1; + fuse_state_t *state = NULL; + fd_t *basefd = NULL, *oldfd = NULL; + fuse_fd_ctx_t *basefd_ctx = NULL; + xlator_t *old_subvol = NULL; + + state = data; + if (state == NULL) { + goto out; + } + + basefd = state->fd; + + basefd_ctx = fuse_fd_ctx_get (state->this, basefd); + + LOCK (&basefd->lock); + { + oldfd = basefd_ctx->activefd ? basefd_ctx->activefd : basefd; + fd_ref (oldfd); + } + UNLOCK (&basefd->lock); + + old_subvol = oldfd->inode->table->xl; + + ret = fuse_migrate_fd (state->this, basefd, old_subvol, + state->active_subvol); + + LOCK (&basefd->lock); + { + if (ret < 0) { + basefd_ctx->migration_failed = 1; + } else { + basefd_ctx->migration_failed = 0; + } + } + UNLOCK (&basefd->lock); + + ret = 0; + +out: + if (oldfd) + fd_unref (oldfd); + + return ret; +} + + +static inline int +fuse_migrate_fd_error (xlator_t *this, fd_t *fd) +{ + fuse_fd_ctx_t *fdctx = NULL; + char error = 0; + + fdctx = fuse_fd_ctx_get (this, fd); + if (fdctx != NULL) { + if (fdctx->migration_failed) { + error = 1; + } + } + + return error; +} + +#define FUSE_FD_GET_ACTIVE_FD(activefd, basefd) \ + do { \ + LOCK (&basefd->lock); \ + { \ + activefd = basefd_ctx->activefd ? \ + basefd_ctx->activefd : basefd; \ + if (activefd != basefd) { \ + fd_ref (activefd); \ + } \ + } \ + UNLOCK (&basefd->lock); \ + \ + if (activefd == basefd) { \ + fd_ref (activefd); \ + } \ + } while (0); + + +static int +fuse_resolve_fd (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + fd_t *basefd = NULL, *activefd = NULL; + xlator_t *active_subvol = NULL, *this = NULL; + int ret = 0; + char fd_migration_error = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + + resolve = state->resolve_now; + + this = state->this; + + basefd = resolve->fd; + basefd_ctx = fuse_fd_ctx_get (this, basefd); + if (basefd_ctx == NULL) { + gf_log (state->this->name, GF_LOG_WARNING, + "fdctx is NULL for basefd (ptr:%p inode-gfid:%s), " + "resolver erroring out with errno EINVAL", + basefd, uuid_utoa (basefd->inode->gfid)); + resolve->op_ret = -1; + resolve->op_errno = EINVAL; + goto resolve_continue; + } + + FUSE_FD_GET_ACTIVE_FD (activefd, basefd); + + active_subvol = activefd->inode->table->xl; + + fd_migration_error = fuse_migrate_fd_error (state->this, basefd); + if (fd_migration_error) { + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } else if (state->active_subvol != active_subvol) { + ret = synctask_new (state->this->ctx->env, fuse_migrate_fd_task, + NULL, NULL, state); + + fd_migration_error = fuse_migrate_fd_error (state->this, + basefd); + fd_unref (activefd); + + FUSE_FD_GET_ACTIVE_FD (activefd, basefd); + active_subvol = activefd->inode->table->xl; + + if ((ret == -1) || fd_migration_error + || (state->active_subvol != active_subvol)) { + if (ret == -1) { + gf_log (state->this->name, GF_LOG_WARNING, + "starting sync-task to migrate " + "basefd (ptr:%p inode-gfid:%s) failed " + "(old-subvolume:%s-%d " + "new-subvolume:%s-%d)", + basefd, + uuid_utoa (basefd->inode->gfid), + active_subvol->name, + active_subvol->graph->id, + state->active_subvol->name, + state->active_subvol->graph->id); + } else { + gf_log (state->this->name, GF_LOG_WARNING, + "fd migration of basefd " + "(ptr:%p inode-gfid:%s) failed " + "(old-subvolume:%s-%d " + "new-subvolume:%s-%d)", + basefd, + uuid_utoa (basefd->inode->gfid), + active_subvol->name, + active_subvol->graph->id, + state->active_subvol->name, + state->active_subvol->graph->id); + } + + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } else { + gf_log (state->this->name, GF_LOG_DEBUG, + "basefd (ptr:%p inode-gfid:%s) migrated " + "successfully in resolver " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, uuid_utoa (basefd->inode->gfid), + active_subvol->name, active_subvol->graph->id, + state->active_subvol->name, + state->active_subvol->graph->id); + } + } + + if ((resolve->op_ret == -1) && (resolve->op_errno == EBADF)) { + gf_log ("fuse-resolve", GF_LOG_WARNING, + "migration of basefd (ptr:%p inode-gfid:%s) " + "did not complete, failing fop with EBADF " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, + uuid_utoa (basefd->inode->gfid), + active_subvol->name, active_subvol->graph->id, + state->active_subvol->name, + state->active_subvol->graph->id); + } + + if (activefd != basefd) { + state->fd = fd_ref (activefd); + fd_unref (basefd); + } + + /* state->active_subvol = active_subvol; */ + +resolve_continue: + if (activefd != NULL) { + fd_unref (activefd); + } + + fuse_resolve_continue (state); + + return 0; +} + + +int +fuse_gfid_set (fuse_state_t *state) +{ + int ret = 0; + + if (uuid_is_null (state->gfid)) + goto out; + + if (!state->xdata) + state->xdata = dict_new (); + + if (!state->xdata) { + ret = -1; + goto out; + } + + ret = dict_set_static_bin (state->xdata, "gfid-req", + state->gfid, sizeof (state->gfid)); +out: + return ret; +} + + +int +fuse_resolve_entry_init (fuse_state_t *state, fuse_resolve_t *resolve, + ino_t par, char *name) +{ + inode_t *parent = NULL; + + parent = fuse_ino_to_inode (par, state->this); + uuid_copy (resolve->pargfid, parent->gfid); + resolve->parhint = parent; + resolve->bname = gf_strdup (name); + + return 0; +} + + +int +fuse_resolve_inode_init (fuse_state_t *state, fuse_resolve_t *resolve, + ino_t ino) +{ + inode_t *inode = NULL; + + inode = fuse_ino_to_inode (ino, state->this); + uuid_copy (resolve->gfid, inode->gfid); + resolve->hint = inode; + + return 0; +} + + +int +fuse_resolve_fd_init (fuse_state_t *state, fuse_resolve_t *resolve, + fd_t *fd) +{ + resolve->fd = fd_ref (fd); + + return 0; +} + + +static int +fuse_resolve (fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + + resolve = state->resolve_now; + + if (resolve->fd) { + + fuse_resolve_fd (state); + + } else if (!uuid_is_null (resolve->pargfid)) { + + fuse_resolve_parent (state); + + } else if (!uuid_is_null (resolve->gfid)) { + + fuse_resolve_inode (state); + + } else { + fuse_resolve_all (state); + } + + return 0; +} + + +static int +fuse_resolve_done (fuse_state_t *state) +{ + fuse_resume_fn_t fn = NULL; + + fn = state->resume_fn; + + fn (state); + + return 0; +} + + +/* + * This function is called multiple times, once per resolving one location/fd. + * state->resolve_now is used to decide which location/fd is to be resolved now + */ +static int +fuse_resolve_all (fuse_state_t *state) +{ + if (state->resolve_now == NULL) { + + state->resolve_now = &state->resolve; + state->loc_now = &state->loc; + + fuse_resolve (state); + + } else if (state->resolve_now == &state->resolve) { + + state->resolve_now = &state->resolve2; + state->loc_now = &state->loc2; + + fuse_resolve (state); + + } else if (state->resolve_now == &state->resolve2) { + + fuse_resolve_done (state); + + } else { + gf_log ("fuse-resolve", GF_LOG_ERROR, + "Invalid pointer for state->resolve_now"); + } + + return 0; +} + + +int +fuse_resolve_continue (fuse_state_t *state) +{ + fuse_resolve_loc_touchup (state); + + fuse_resolve_all (state); + + return 0; +} + + +int +fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn) +{ + fuse_gfid_set (state); + + state->resume_fn = fn; + + fuse_resolve_all (state); + + return 0; +} diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 3f9778c65..a192d6059 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -1,23 +1,21 @@ #!/bin/sh -# (C) 2006, 2007, 2008 Z RESEARCH Inc. <http://www.zresearch.com> -# +# (C) 2006, 2007, 2008 Gluster Inc. <http://www.gluster.com> +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free # Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA - - _init () { # log level definitions @@ -25,138 +23,444 @@ _init () LOG_CRITICAL=CRITICAL; LOG_ERROR=ERROR; LOG_WARNING=WARNING; - LOG_NORMAL=NORMAL + LOG_INFO=INFO LOG_DEBUG=DEBUG; + LOG_TRACE=TRACE; - # set default log level to ERROR - log_level=$LOG_NORMAL; -} + HOST_NAME_MAX=64; -start_glusterfs () -{ prefix="@prefix@"; exec_prefix=@exec_prefix@; cmd_line=$(echo "@sbindir@/glusterfs"); - + + case `uname -s` in + NetBSD) + getinode="stat -f %i" + getdev="stat -f %d" + lgetinode="${getinode} -L" + lgetdev="${getdev} -L" + + mounttab=/proc/mounts + ;; + Linux) + getinode="stat -c %i $i" + getdev="stat -c %d $d" + lgetinode="${getinode} -L" + lgetdev="${getdev} -L" + + mounttab=/etc/mtab + ;; + esac + + UPDATEDBCONF=/etc/updatedb.conf +} + +parse_backup_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/\:/ /g') + for server in ${servers}; do + length=$(echo $server | wc -c) + if [ ${length} -gt ${HOST_NAME_MAX} ]; then + echo "Hostname:${server} provided is too long.. skipping" + continue + fi + new_servers=$(echo "$new_servers $server") + done + echo ${new_servers} +} + +start_glusterfs () +{ + # lets the comparsion be case insensitive for all strings + if [ -n "$log_level_str" ]; then - case "$log_level_str" in - "ERROR") - log_level=$LOG_ERROR; - ;; - "NORMAL") - log_level=$LOG_NORMAL + case "$( echo $log_level_str | tr '[a-z]' '[A-Z]')" in + "ERROR") + log_level=$LOG_ERROR; ;; - "DEBUG") - log_level=$LOG_DEBUG; - ;; - "CRITICAL") - log_level=$LOG_CRITICAL; - ;; - "WARNING") - log_level=$LOG_WARNING; - ;; - "NONE") - log_level=$LOG_NONE; - ;; - *) - echo "invalid log level $log_level_str, using NORMAL"; - log_level=$LOG_NORMAL; - ;; - esac - fi - cmd_line=$(echo "$cmd_line --log-level=$log_level"); - - if [ -n "$log_file" ]; then - cmd_line=$(echo "$cmd_line --log-file=$log_file"); + "INFO") + log_level=$LOG_INFO + ;; + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "TRACE") + log_level=$LOG_TRACE; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + echo "invalid log level $log_level_str, using INFO"; + log_level=$LOG_INFO; + ;; + esac + fi + +#options without values start here + if [ -n "$read_only" ]; then + cmd_line=$(echo "$cmd_line --read-only"); + fi + + if [ -n "$acl" ]; then + cmd_line=$(echo "$cmd_line --acl"); + fi + + if [ -n "$selinux" ]; then + cmd_line=$(echo "$cmd_line --selinux"); + fi + + if [ -n "$enable_ino32" ]; then + cmd_line=$(echo "$cmd_line --enable-ino32"); + fi + + if [ -n "$worm" ]; then + cmd_line=$(echo "$cmd_line --worm"); + fi + + if [ -n "$fopen_keep_cache" ]; then + cmd_line=$(echo "$cmd_line --fopen-keep-cache"); fi if [ -n "$volfile_check" ]; then - cmd_line=$(echo "$cmd_line --volfile-check"); + cmd_line=$(echo "$cmd_line --volfile-check"); + fi + + if [ -n "$mem_accounting" ]; then + cmd_line=$(echo "$cmd_line --mem-accounting"); + fi + + if [ -n "$aux_gfid_mount" ]; then + cmd_line=$(echo "$cmd_line --aux-gfid-mount"); + fi + +#options with values start here + if [ -n "$log_level" ]; then + cmd_line=$(echo "$cmd_line --log-level=$log_level"); + fi + + if [ -n "$log_file" ]; then + cmd_line=$(echo "$cmd_line --log-file=$log_file"); fi if [ -n "$direct_io_mode" ]; then - cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); fi - - if [ -z "$volfile_loc" ]; then - if [ -n "$transport" ]; then - cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip \ ---volfile-server-port=$server_port \ ---volfile-server-transport=$transport"); - else - cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip \ ---volfile-server-port=$server_port"); - fi - else - cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + + if [ -n "$use_readdirp" ]; then + cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp"); fi if [ -n "$volume_name" ]; then cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); fi - - if [ -n "$volume_id" ]; then - cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + + if [ -n "$attribute_timeout" ]; then + cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout"); + fi + + if [ -n "$entry_timeout" ]; then + cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout"); + fi + + if [ -n "$negative_timeout" ]; then + cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout"); + fi + + if [ -n "$gid_timeout" ]; then + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + fi + + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi + + if [ -n "$cong_threshold" ]; then + cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + fi + + if [ -n "$xlator_option" ]; then + xlator_option=$(echo $xlator_option | sed s/"xlator-option="/"--xlator-option "/g) + cmd_line=$(echo "$cmd_line $xlator_option"); + fi + + # for rdma volume, we have to fetch volfile with '.rdma' added + # to volume name, so that it fetches the right client vol file + volume_id_rdma=""; + + if [ -z "$volfile_loc" ]; then + if [ -n "$server_ip" ]; then + + cmd_line=$(echo "$cmd_line --volfile-server=$server_ip"); + + if [ -n "$backup_volfile_servers" ]; then + servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) + for i in $(echo ${servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + fi + + if [ -n "$server_port" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port"); + fi + + if [ -n "$transport" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport"); + if [ "$transport" = "rdma" ]; then + volume_id_rdma=".rdma"; + fi + fi + + if [ -n "$volume_id" ]; then + if [ -n "$volume_id_rdma" ]; then + volume_id="$volume_id$volume_id_rdma"; + fi + cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + fi + fi + else + cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); fi cmd_line=$(echo "$cmd_line $mount_point"); - exec $cmd_line; + err=0; + $cmd_line; + + + inode=$( ${getinode} $mount_point 2>/dev/null); + + # this is required if the stat returns error + if [ -z "$inode" ]; then + inode="0"; + fi + + if [ $inode -ne 1 ]; then + err=1; + fi + + if [ $err -eq "1" ]; then + echo "Mount failed. Please check the log file for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; + fi } +usage () +{ + +echo "Usage: mount.glusterfs <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint> +Options: +man 8 mount.glusterfs -main () +To display the version number of the mount helper: +mount.glusterfs --version" + +} + +# check for recursive mounts. i.e, mounting over an existing brick +check_recursive_mount () { - options=$(echo "$@" | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p'); - new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p'); - - [ -n "$new_log_level" ] && { - log_level_str="$new_log_level"; - } - log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p'); + if [ $1 = "/" ]; then + echo Cannot mount over root; + exit 2; + fi + # GFID check first + # remove trailing / from mount point + mnt_dir=${1%/}; - transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p'); + export PATH; + # check whether getfattr exists + which getfattr > /dev/null 2>&1; + if [ $? -ne 0 ]; then + return; + fi - direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p'); + getfattr -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid="; + if [ $? -eq 0 ]; then + echo "ERROR: $mnt_dir is in use as a brick of a gluster volume"; + exit 2; + fi - volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p'); + # check if the mount point is a brick's parent directory + GLUSTERD_WORKDIR="/var/lib/glusterd"; - volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p'); + ls -L "$GLUSTERD_WORKDIR"/vols/*/bricks/* > /dev/null 2>&1; + if [ $? -ne 0 ]; then + return; + fi - volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p'); + brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* | cut -d "=" -f 2`; + root_inode=`${lgetinode} /`; + root_dev=`${lgetdev} /`; + mnt_inode=`${lgetinode} $mnt_dir`; + mnt_dev=`${lgetdev} $mnt_dir`; + for brick in "$brick_path"; + do + # evaluate brick path to see if this is local, if non-local, skip iteration + ls $brick > /dev/null 2>&1; + if [ $? -ne 0 ]; then + continue; + fi + getfattr -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid="; + if [ $? -ne 0 ]; then + continue; + else + # brick is local + while [ 1 ]; + do + tmp_brick="$brick"; + brick="$brick"/..; + brick_dev=`${lgetdev} $brick`; + brick_inode=`${lgetinode} $brick`; + if [ "$mnt_inode" -eq "$brick_inode" -a "$mnt_dev" -eq "$brick_dev" ]; then + echo ERROR: $mnt_dir is a parent of the brick $tmp_brick; + exit 2; + fi + [ "$root_inode" -ne "$brick_inode" -o "$root_dev" -ne "$brick_dev" ] || break; + done; + fi + done; +} + +main () +{ + helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p'); + in_opt="no" + pos_args=0 + for opt in "$@"; do + if [ "$in_opt" = "yes" ]; then + for pair in $(echo "$opt" | tr "," " "); do + # Handle options without values. + case "$pair" in + "ro") read_only=1 ;; + "acl") acl=1 ;; + "selinux") selinux=1 ;; + "worm") worm=1 ;; + "fopen-keep-cache") fopen_keep_cache=1 ;; + "enable-ino32") enable_ino32=1 ;; + "mem-accounting") mem_accounting=1;; + "aux-gfid-mount") + if [ `uname -s` = "Linux" ]; then + aux_gfid_mount=1 + fi + ;; + # "mount -t glusterfs" sends this, but it's useless. + "rw") ;; + # these ones are interpreted during system initialization + "noauto") ;; + "_netdev") ;; + *) + key=$(echo "$pair" | cut -f1 -d'='); + value=$(echo "$pair" | cut -f2- -d'='); + + # Handle options with values. + case "$key" in + "log-level") log_level_str=$value ;; + "log-file") log_file=$value ;; + "transport") transport=$value ;; + "direct-io-mode") direct_io_mode=$value ;; + "volume-name") volume_name=$value ;; + "volume-id") volume_id=$value ;; + "volfile-check") volfile_check=$value ;; + "server-port") server_port=$value ;; + "attribute-timeout") + attribute_timeout=$value ;; + "entry-timeout") entry_timeout=$value ;; + "negative-timeout") negative_timeout=$value ;; + "gid-timeout") gid_timeout=$value ;; + "background-qlen") bg_qlen=$value ;; + "backup-volfile-servers") backup_volfile_servers=$value ;; + "congestion-threshold") cong_threshold=$value ;; + "xlator-option") xlator_option=$xlator_option" "$pair ;; + "fuse-mountopts") fuse_mountopts=$value ;; + "use-readdirp") use_readdirp=$value ;; + *) + # Passthru + [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," + fuse_mountopts="$fuse_mountopts$pair" + ;; + esac + esac + done + in_opt="no" + elif [ "$opt" = "-o" ]; then + in_opt="yes" + else + case $pos_args in + 0) volfile_loc=$opt ;; + 1) mount_point=$opt ;; + *) echo "extra arguments at end (ignored)" ;; + esac + pos_args=$((pos_args+1)) + fi + done + if [ $in_opt = "yes" -o $pos_args -lt 2 ]; then + usage + exit 1 + fi - volfile_loc="$1"; - [ -r "$volfile_loc" ] || { - server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p'); - server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); - [ -n "$server_port" ] || { - server_port="6996"; - } + server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p'); + test_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$test_str" ] && { + volume_id="$test_str"; + } + volfile_loc=""; + } - volfile_loc=""; + # + [ -n "$helper" ] && { + cmd_line=$(echo "$cmd_line --$helper"); + exec $cmd_line; + exit 0; + } + + # No need to do a ! -d test, it is taken care while initializing the + # variable mount_point + [ -z "$mount_point" -o ! -d "$mount_point" ] && { + echo "ERROR: Mount point does not exist." + usage; + exit 0; } - new_fs_options=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \ - -e 's/[,]*log-level=[^,]*//' \ - -e 's/[,]*volume-name=[^,]*//' \ - -e 's/[,]*direct-io-mode=[^,]*//' \ - -e 's/[,]*volfile-check=[^,]*//' \ - -e 's/[,]*transport=[^,]*//' \ - -e 's/[,]*volume-id=[^,]*//'); - # following line is product of love towards sed - # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p'); - - mount_point="$2"; # Simple check to avoid multiple identical mounts - if grep -q "glusterfs $mount_point fuse" /etc/mtab; then - echo "$0: according to mtab, GlusterFS is already mounted on $mount_point" - exit 1 + if grep -q "[[:space:]+]${mount_point}[[:space:]+]fuse" $mounttab; then + echo -n "$0: according to mtab, GlusterFS is already mounted on " + echo "$mount_point" + exit 0; fi - - fs_options=$(echo "$fs_options,$new_fs_options"); - + + check_recursive_mount "$mount_point"; + + # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5). updatedb(8) + # should not index files under GlusterFS, indexing will slow down GlusteFS + # if the filesystem is several TB in size. + test -f $UPDATEDBCONF && { + if ! grep -q 'glusterfs' $UPDATEDBCONF; then + sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \ + > ${UPDATEDBCONF}.bak + mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF + fi + } + start_glusterfs; } diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in index b6a3d1857..b12b4e04e 100755 --- a/xlators/mount/fuse/utils/mount_glusterfs.in +++ b/xlators/mount/fuse/utils/mount_glusterfs.in @@ -1,5 +1,5 @@ #!/bin/sh -# (C) 2008 Z RESEARCH Inc. <http://www.zresearch.com> +# (C) 2008 Gluster Inc. <http://www.gluster.com> # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -25,11 +25,11 @@ _init () LOG_CRITICAL=CRITICAL; LOG_ERROR=ERROR; LOG_WARNING=WARNING; - LOG_NORMAL=NORMAL; + LOG_INFO=INFO; LOG_DEBUG=DEBUG; # set default log level to ERROR - log_level=$LOG_NORMAL; + log_level=$LOG_INFO; } start_glusterfs () @@ -43,8 +43,8 @@ start_glusterfs () "ERROR") log_level=$LOG_ERROR; ;; - "NORMAL") - log_level=$LOG_NORMAL; + "INFO") + log_level=$LOG_INFO; ;; "DEBUG") log_level=$LOG_DEBUG; @@ -59,8 +59,8 @@ start_glusterfs () log_level=$LOG_NONE; ;; *) - echo "invalid log level $log_level_str, using NORMAL"; - log_level=$LOG_NORMAL; + echo "invalid log level $log_level_str, using INFO"; + log_level=$LOG_INFO; ;; esac fi @@ -82,12 +82,10 @@ start_glusterfs () if [ -n "$transport" ]; then cmd_line=$(echo "$cmd_line \ --volfile-server=$server_ip \ ---volfile-server-port=$server_port \ --volfile-server-transport=$transport"); else cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip \ ---volfile-server-port=$server_port"); +--volfile-server=$server_ip"); fi else cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); @@ -167,19 +165,15 @@ main () # TODO: use getopt. This is very much darwin specific volfile_loc="$1"; - while [ "$volfile_loc" == "-o" ] ; do + while [ "$volfile_loc" = "-o" ] ; do shift ; shift ; volfile_loc="$1"; done [ -r "$volfile_loc" ] || { - server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p'); - server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); - [ -n "$server_port" ] || { - server_port="6996"; - } - + server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p'); + volume_id=$(echo "$volfile_loc" | sed -n 's/[a-zA-Z0-9:.\-]*:\(.*\)/\1/p'); volfile_loc=""; } # following line is product of love towards sed |
