diff options
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 5900 |
1 files changed, 3501 insertions, 2399 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index e92f01cd6..fb45c7a67 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -24,17 +14,28 @@ #define __XOPEN_SOURCE 500 +#include <openssl/md5.h> #include <stdint.h> #include <sys/time.h> #include <sys/resource.h> #include <errno.h> +#include <libgen.h> +#include <pthread.h> #include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> #endif /* GF_BSD_HOST_OS */ +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + #include "glusterfs.h" +#include "checksum.h" #include "dict.h" #include "logging.h" #include "posix.h" @@ -45,18 +46,28 @@ #include "compat.h" #include "byte-order.h" #include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 #undef HAVE_SET_FSID #ifdef HAVE_SET_FSID #define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; -#define SET_FS_ID(uid, gid) do { \ +#define SET_FS_ID(uid, gid) do { \ old_fsuid = setfsuid (uid); \ old_fsgid = setfsgid (gid); \ } while (0) -#define SET_TO_OLD_FS_ID() do { \ +#define SET_TO_OLD_FS_ID() do { \ setfsuid (old_fsuid); \ setfsgid (old_fsgid); \ } while (0) @@ -69,245 +80,106 @@ #endif -typedef struct { - xlator_t *this; - const char *real_path; - dict_t *xattr; - struct stat *stbuf; - loc_t *loc; -} posix_xattr_filler_t; - int posix_forget (xlator_t *this, inode_t *inode) { - uint64_t tmp_cache = 0; - if (!inode_ctx_del (inode, this, &tmp_cache)) - dict_destroy ((dict_t *)(long)tmp_cache); - - return 0; -} - -static void -_posix_xattr_get_set (dict_t *xattr_req, - char *key, - data_t *data, - void *xattrargs) -{ - posix_xattr_filler_t *filler = xattrargs; - char *value = NULL; - ssize_t xattr_size = -1; - int ret = -1; - char *databuf = NULL; - int _fd = -1; - loc_t *loc = NULL; - ssize_t req_size = 0; - - - /* should size be put into the data_t ? */ - if (!strcmp (key, "glusterfs.content")) { - /* file content request */ - req_size = data_to_uint64 (data); - if (req_size >= filler->stbuf->st_size) { - _fd = open (filler->real_path, O_RDONLY); - - if (_fd == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "opening file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - databuf = calloc (1, filler->stbuf->st_size); - - if (!databuf) { - gf_log (filler->this->name, GF_LOG_ERROR, - "out of memory :("); - goto err; - } - - ret = read (_fd, databuf, filler->stbuf->st_size); - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "read on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = close (_fd); - _fd = -1; - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "close on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = dict_set_bin (filler->xattr, key, - databuf, filler->stbuf->st_size); - if (ret < 0) { - goto err; - } - - /* To avoid double free in cleanup below */ - databuf = NULL; - err: - if (_fd != -1) - close (_fd); - if (databuf) - FREE (databuf); - } - } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { - loc = filler->loc; - if (!list_empty (&loc->inode->fd_list)) { - ret = dict_set_uint32 (filler->xattr, key, 1); - } else { - ret = dict_set_uint32 (filler->xattr, key, 0); - } - } else { - xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0); - - if (xattr_size > 0) { - value = calloc (1, xattr_size + 1); - - sys_lgetxattr (filler->real_path, key, value, - xattr_size); - - value[xattr_size] = '\0'; - ret = dict_set_bin (filler->xattr, key, - value, xattr_size); - if (ret < 0) - gf_log (filler->this->name, GF_LOG_ERROR, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - } - } -} - - -dict_t * -posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc, - dict_t *xattr_req, struct stat *buf) -{ - dict_t *xattr = NULL; - posix_xattr_filler_t filler = {0, }; - - xattr = get_new_dict(); - if (!xattr) { - gf_log (this->name, GF_LOG_ERROR, - "memory allocation failed :("); - goto out; - } - - filler.this = this; - filler.real_path = real_path; - filler.xattr = xattr; - filler.stbuf = buf; - filler.loc = loc; - - dict_foreach (xattr_req, _posix_xattr_get_set, &filler); -out: - return xattr; -} - - -static int -posix_scale_st_ino (struct posix_private *priv, struct stat *buf) -{ - int i = 0; - int ret = -1; - ino_t temp_ino = 0; - - for (i = 0; i < priv->num_devices_to_span; i++) { - if (buf->st_dev == priv->st_device[i]) - break; - if (priv->st_device[i] == 0) { - priv->st_device[i] = buf->st_dev; - break; - } - } - - if (i == priv->num_devices_to_span) - goto out; - - temp_ino = (buf->st_ino * priv->num_devices_to_span) + i; - - buf->st_ino = temp_ino; + uint64_t tmp_cache = 0; + if (!inode_ctx_del (inode, this, &tmp_cache)) + dict_destroy ((dict_t *)(long)tmp_cache); - ret = 0; - out: - return ret; + return 0; } +/* Regular fops */ int32_t posix_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) + loc_t *loc, dict_t *xdata) { - struct stat buf = {0, }; - char * real_path = NULL; + struct iatt buf = {0, }; int32_t op_ret = -1; + int32_t entry_ret = 0; int32_t op_errno = 0; dict_t * xattr = NULL; - - struct posix_private *priv = NULL; + char * real_path = NULL; + char * par_path = NULL; + struct iatt postparent = {0,}; + int32_t gfidless = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); - MAKE_REAL_PATH (real_path, this, loc->path); + /* The Hidden directory should be for housekeeping purpose and it + should not get any gfid on it */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "Lookup issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } - priv = this->private; + op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); + op_ret = -1; + if (uuid_is_null (loc->pargfid)) { + /* nameless lookup */ + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + } else { + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); + + if (uuid_is_null (loc->inode->gfid)) { + posix_gfid_heal (this, real_path, loc, xdata); + MAKE_ENTRY_HANDLE (real_path, par_path, this, + loc, &buf); + } + } - op_ret = lstat (real_path, &buf); op_errno = errno; if (op_ret == -1) { - if (op_errno != ENOENT) { - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s failed: %s", - loc->path, strerror (op_errno)); - } - goto out; + if (op_errno != ENOENT) { + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", + real_path, strerror (op_errno)); + } + + entry_ret = -1; + goto parent; } - /* Make sure we don't access another mountpoint inside export dir. - * It may cause inode number to repeat from single export point, - * which leads to severe problems.. - */ - if (!priv->span_devices) { - if (priv->st_device[0] != buf.st_dev) { - op_errno = ENOENT; - gf_log (this->name, GF_LOG_WARNING, - "%s: different mountpoint/device, returning " - "ENOENT", loc->path); - goto out; - } - } else { - op_ret = posix_scale_st_ino (priv, &buf); - if (-1 == op_ret) { - op_errno = ENOENT; - gf_log (this->name, GF_LOG_WARNING, - "%s: from different mountpoint", - loc->path); - goto out; - } + if (xdata && (op_ret == 0)) { + xattr = posix_lookup_xattr_fill (this, real_path, loc, + xdata, &buf); } - if (xattr_req && (op_ret == 0)) { - xattr = posix_lookup_xattr_fill (this, real_path, loc, - xattr_req, &buf); +parent: + if (par_path) { + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } } - op_ret = 0; + op_ret = entry_ret; out: - frame->root->rsp_refs = NULL; - if (xattr) dict_ref (xattr); - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &buf, xattr); + if (!op_ret && !gfidless && uuid_is_null (buf.ia_gfid)) { + gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for " + "%s", (real_path) ? real_path: ""); + op_ret = -1; + op_errno = ENODATA; + } + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &buf, xattr, &postparent); if (xattr) dict_unref (xattr); @@ -317,14 +189,13 @@ out: int32_t -posix_stat (call_frame_t *frame, - xlator_t *this, - loc_t *loc) +posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - struct stat buf = {0,}; - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; + struct iatt buf = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_private *priv = NULL; + char *real_path = NULL; DECLARE_OLD_FS_ID_VAR; @@ -332,404 +203,829 @@ posix_stat (call_frame_t *frame, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = lstat (real_path, &buf); + MAKE_INODE_HANDLE (real_path, this, loc, &buf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s: %s", loc->path, strerror (op_errno)); + gf_log (this->name, (op_errno == ENOENT)? + GF_LOG_DEBUG:GF_LOG_ERROR, + "lstat on %s failed: %s", real_path, + strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID(); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL); return 0; } -int32_t -posix_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd) +static int +posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf) { - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - DIR * dir = NULL; - struct posix_fd * pfd = NULL; + int32_t ret = -1; + mode_t mode = 0; + struct stat stat; + int is_symlink = 0; + + ret = sys_lstat (path, &stat); + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, + "lstat failed: %s (%s)", path, strerror (errno)); + goto out; + } + + if (S_ISLNK (stat.st_mode)) + is_symlink = 1; + + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + ret = lchmod (path, mode); + if ((ret == -1) && (errno == ENOSYS)) { + /* in Linux symlinks are always in mode 0777 and no + such call as lchmod exists. + */ + gf_log (this->name, GF_LOG_DEBUG, + "%s (%s)", path, strerror (errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = chmod (path, mode); + } +out: + return ret; +} + +static int +posix_do_chown (xlator_t *this, + const char *path, + struct iatt *stbuf, + int32_t valid) +{ + int32_t ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = lchown (path, uid, gid); + + return ret; +} + +static int +posix_do_utimes (xlator_t *this, + const char *path, + struct iatt *stbuf) +{ + int32_t ret = -1; + struct timeval tv[2] = {{0,},{0,}}; + struct stat stat; + int is_symlink = 0; + + ret = sys_lstat (path, &stat); + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, + "%s (%s)", path, strerror (errno)); + goto out; + } + + if (S_ISLNK (stat.st_mode)) + is_symlink = 1; + + tv[0].tv_sec = stbuf->ia_atime; + tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; + tv[1].tv_sec = stbuf->ia_mtime; + tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; + + ret = lutimes (path, tv); + if ((ret == -1) && (errno == ENOSYS)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (%s)", path, strerror (errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = utimes (path, tv); + } + +out: + return ret; +} + +int +posix_setattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); - VALIDATE_OR_GOTO (fd, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - - dir = opendir (real_path); + MAKE_INODE_HANDLE (real_path, this, loc, &statpre); - if (dir == NULL) { + if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "opendir failed on %s (%s)", - loc->path, strerror (op_errno)); + "setattr (lstat) on %s failed: %s", real_path, + strerror (op_errno)); goto out; } - op_ret = dirfd (dir); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "dirfd() failed on %s (%s)", - loc->path, strerror (op_errno)); - goto out; - } - - pfd = CALLOC (1, sizeof (*fd)); - if (!pfd) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - goto out; + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_chmod (this, real_path, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "setattr (chmod) on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } } - pfd->dir = dir; - pfd->fd = dirfd (dir); - pfd->path = strdup (real_path); - if (!pfd->path) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - goto out; + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){ + op_ret = posix_do_chown (this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "setattr (chown) on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } } - fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_utimes (this, real_path, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "setattr (utimes) on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } + } - frame->root->rsp_refs = NULL; + if (!valid) { + op_ret = lchown (real_path, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lchown (%s, -1, -1) failed => (%s)", + real_path, strerror (op_errno)); - op_ret = 0; + goto out; + } + } - out: + op_ret = posix_pstat (this, loc->gfid, real_path, &statpost); if (op_ret == -1) { - if (dir) { - closedir (dir); - dir = NULL; - } - if (pfd) { - if (pfd->path) - FREE (pfd->path); - FREE (pfd); - pfd = NULL; - } + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "setattr (lstat) on %s failed: %s", real_path, + strerror (op_errno)); + goto out; } + op_ret = 0; + +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND (frame, op_ret, op_errno, fd); + + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, + &statpre, &statpost, NULL); + return 0; } +int32_t +posix_do_fchown (xlator_t *this, + int fd, + struct iatt *stbuf, + int32_t valid) +{ + int ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = fchown (fd, uid, gid); + + return ret; +} + int32_t -posix_getdents (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, int32_t flag) +posix_do_fchmod (xlator_t *this, + int fd, struct iatt *stbuf) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - dir_entry_t entries = {0, }; - dir_entry_t * tmp = NULL; - DIR * dir = NULL; - struct dirent * dirent = NULL; - int real_path_len = -1; - int entry_path_len = -1; - char * entry_path = NULL; - int count = 0; - struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; - struct stat buf = {0,}; - int ret = -1; - char tmp_real_path[ZR_PATH_MAX]; - char linkpath[ZR_PATH_MAX]; + mode_t mode = 0; - DECLARE_OLD_FS_ID_VAR ; + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + return fchmod (fd, mode); +} + +static int +posix_do_futimes (xlator_t *this, + int fd, + struct iatt *stbuf) +{ + gf_log (this->name, GF_LOG_WARNING, "function not implemented fd(%d)", fd); + + errno = ENOSYS; + return -1; +} + +int +posix_fsetattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, - "fd %p does not have context in %s", - fd, this->name); + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - if (!pfd->path) { - op_errno = EBADFD; + + op_ret = posix_fdstat (this, pfd->fd, &statpre); + if (op_ret == -1) { + op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pfd does not have path set (possibly file " - "fd, fd=%p)", fd); + "fsetattr (fstat) failed on fd=%p: %s", fd, + strerror (op_errno)); goto out; } - real_path = pfd->path; - real_path_len = strlen (real_path); + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_fchmod (this, pfd->fd, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "fsetattr (fchmod) failed on fd=%p: %s", + fd, strerror (op_errno)); + goto out; + } + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "fsetattr (fchown) failed on fd=%p: %s", + fd, strerror (op_errno)); + goto out; + } + + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_futimes (this, pfd->fd, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "fsetattr (futimes) on failed fd=%p: %s", fd, + strerror (op_errno)); + goto out; + } + } - entry_path_len = real_path_len + NAME_MAX; - entry_path = CALLOC (1, entry_path_len); + if (!valid) { + op_ret = fchown (pfd->fd, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "fchown (%d, -1, -1) failed => (%s)", + pfd->fd, strerror (op_errno)); - if (!entry_path) { + goto out; + } + } + + op_ret = posix_fdstat (this, pfd->fd, &statpost); + if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); + "fsetattr (fstat) failed on fd=%p: %s", fd, + strerror (op_errno)); goto out; } - strncpy (entry_path, real_path, entry_path_len); - entry_path[real_path_len] = '/'; + op_ret = 0; - dir = pfd->dir; +out: + SET_TO_OLD_FS_ID (); - if (!dir) { - op_errno = EBADFD; + STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, + &statpre, &statpost, NULL); + + return 0; +} + +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; gf_log (this->name, GF_LOG_ERROR, - "pfd does not have dir set (possibly file fd, " - "fd=%p, path=`%s'", - fd, real_path); + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); goto out; } - /* TODO: check for all the type of flag, and behave appropriately */ + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + goto out; + } - while ((dirent = readdir (dir))) { - if (!dirent) - break; + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } - /* This helps in self-heal, when only directories - needs to be replicated */ +out: + SET_TO_OLD_FS_ID (); - /* This is to reduce the network traffic, in case only - directory is needed from posix */ + return ret; +} - strncpy (tmp_real_path, real_path, ZR_PATH_MAX); - strncat (tmp_real_path, "/", - ZR_PATH_MAX - strlen (tmp_real_path)); +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; - strncat (tmp_real_path, dirent->d_name, - ZR_PATH_MAX - strlen (tmp_real_path)); - ret = lstat (tmp_real_path, &buf); + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} - if ((flag == GF_GET_DIR_ONLY) - && (ret != -1 && !S_ISDIR(buf.st_mode))) { - continue; +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ + size_t num_vect = 0; + int32_t num_loop = 1; + int32_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + size_t remain = 0; + size_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size ; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC (num_vect, sizeof(struct iovec), + gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, + "memory alloc failed, vect_size %d: %s", + vect_size, strerror(errno)); + GF_FREE(vector); + return -1; } + } else { + iov_base = GF_CALLOC (vect_size, sizeof(char), + gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + lseek(fd, offset, SEEK_SET); + for (idx = 0; idx < num_loop; idx++) { + op_ret = writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + } + if (extra) { + op_ret = writev(fd, vector, extra); + if (op_ret < 0) + goto err; + } + if (remain) { + vector[0].iov_len = remain; + op_ret = writev(fd, vector , 1); + if (op_ret < 0) + goto err; + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} - tmp = CALLOC (1, sizeof (*tmp)); +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; - if (!tmp) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - goto out; - } + DECLARE_OLD_FS_ID_VAR; - tmp->name = strdup (dirent->d_name); - if (!tmp->name) { - op_errno = errno; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation fstat failed on fd = %p: %s", fd, + strerror (errno)); + goto out; + } + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_log(this->name, GF_LOG_ERROR, + "zerofill failed on fd %d length %ld %s", + pfd->fd, len, strerror(errno)); + goto out; + } + if (pfd->flags & (O_SYNC|O_DSYNC)) { + ret = fsync (pfd->fd); + if (ret) { gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); + "fsync() in writev on fd %d failed: %s", + pfd->fd, strerror (errno)); + ret = -errno; goto out; } + } - if (entry_path_len < - (real_path_len + 1 + strlen (tmp->name) + 1)) { - entry_path_len = (real_path_len + - strlen (tmp->name) + 1024); + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "post operation fstat failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } - entry_path = realloc (entry_path, entry_path_len); - } +out: + SET_TO_OLD_FS_ID (); - strcpy (&entry_path[real_path_len+1], tmp->name); + return ret; +} - ret = lstat (entry_path, &tmp->buf); +static int32_t +_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - entry_path, strerror (op_errno)); - goto out; - } + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; - if (S_ISLNK(tmp->buf.st_mode)) { + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; - ret = readlink (entry_path, linkpath, ZR_PATH_MAX); - if (ret != -1) { - linkpath[ret] = '\0'; - tmp->link = strdup (linkpath); - } - } else { - tmp->link = ""; - } + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + return 0; - count++; +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} - tmp->next = entries.next; - entries.next = tmp; +static int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; - /* if size is 0, count can never be = size, so entire - dir is read */ - if (count == size) - break; - } + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; - FREE (entry_path); + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); + return 0; - op_ret = 0; +err: + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); + return 0; - out: - SET_TO_OLD_FS_ID (); +} - if (op_ret == -1) { - if (entry_path) - FREE (entry_path); +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_zerofill(frame, this, fd, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); + return 0; + +} + +int32_t +posix_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + DIR * dir = NULL; + struct posix_fd * pfd = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (loc->path, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + op_ret = -1; + dir = opendir (real_path); + + if (dir == NULL) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "opendir failed on %s: %s", + real_path, strerror (op_errno)); + goto out; } - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &entries, count); + op_ret = dirfd (dir); + if (op_ret < 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "dirfd() failed on %s: %s", + real_path, strerror (op_errno)); + goto out; + } - if (op_ret == 0) { - while (entries.next) { - tmp = entries.next; - entries.next = entries.next->next; - FREE (tmp->name); - FREE (tmp); + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->dir = dir; + pfd->fd = dirfd (dir); + + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set the fd context path=%s fd=%p", + real_path, fd); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (dir) { + closedir (dir); + dir = NULL; + } + if (pfd) { + GF_FREE (pfd); + pfd = NULL; } } + SET_TO_OLD_FS_ID (); + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); return 0; } - int32_t posix_releasedir (xlator_t *this, - fd_t *fd) + fd_t *fd) { - int32_t op_ret = -1; - int32_t op_errno = 0; struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; + uint64_t tmp_pfd = 0; int ret = 0; + struct posix_private *priv = NULL; + VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); ret = fd_ctx_del (fd, this, &tmp_pfd); if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_DEBUG, "pfd from fd=%p is NULL", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; + pfd = (struct posix_fd *)(long)tmp_pfd; if (!pfd->dir) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "pfd->dir is NULL for fd=%p path=%s", - fd, pfd->path ? pfd->path : "<NULL>"); - goto out; - } - - ret = closedir (pfd->dir); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "closedir on %p failed", pfd->dir); - goto out; - } - pfd->dir = NULL; - - if (!pfd->path) { - op_errno = EBADFD; - gf_log (this->name, GF_LOG_ERROR, - "pfd->path was NULL. fd=%p pfd=%p", - fd, pfd); + gf_log (this->name, GF_LOG_WARNING, + "pfd->dir is NULL for fd=%p", fd); goto out; } - op_ret = 0; + priv = this->private; - out: - if (pfd) { - if (pfd->path) - FREE (pfd->path); - FREE (pfd); + pthread_mutex_lock (&priv->janitor_lock); + { + INIT_LIST_HEAD (&pfd->list); + list_add_tail (&pfd->list, &priv->janitor_fds); + pthread_cond_signal (&priv->janitor_cond); } + pthread_mutex_unlock (&priv->janitor_lock); +out: return 0; } int32_t posix_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) + loc_t *loc, size_t size, dict_t *xdata) { char * dest = NULL; int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; + struct iatt stbuf = {0,}; DECLARE_OLD_FS_ID_VAR; - VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame, out); SET_FS_ID (frame->root->uid, frame->root->gid); dest = alloca (size + 1); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } op_ret = readlink (real_path, dest, size); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "readlink on %s: %s", loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "readlink on %s failed: %s", real_path, + strerror (op_errno)); goto out; } dest[op_ret] = 0; - - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, dest); + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); return 0; } -int32_t + +int posix_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev) + loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) { - int tmp_fd = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct stat stbuf = { 0, }; + int tmp_fd = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + char *par_path = 0; + struct iatt stbuf = { 0, }; + char was_present = 1; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + void * uuid_req = NULL; DECLARE_OLD_FS_ID_VAR; @@ -737,67 +1033,157 @@ posix_mknod (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); + + gid = frame->root->gid; + + SET_FS_ID (frame->root->uid, gid); + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent of %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + /* Check if the 'gfid' already exists, because this mknod may be an + internal call from distribute for creating 'linkfile', and that + linkfile may be for a hardlinked file */ + if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); + op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get the gfid from dict for %s", + loc->path); + goto real_op; + } + op_ret = posix_create_link_if_gfid_exists (this, uuid_req, + real_path); + if (!op_ret) + goto post_op; + } + +real_op: +#ifdef __NetBSD__ + if (S_ISFIFO(mode)) + op_ret = mkfifo (real_path, mode); + else +#endif /* __NetBSD__ */ op_ret = mknod (real_path, mode, dev); if (op_ret == -1) { op_errno = errno; - if ((op_errno == EINVAL) && S_ISREG (mode)) { - /* Over Darwin, mknod with (S_IFREG|mode) - doesn't work */ - tmp_fd = creat (real_path, mode); - if (tmp_fd == -1) - goto out; - close (tmp_fd); - } else { + if ((op_errno == EINVAL) && S_ISREG (mode)) { + /* Over Darwin, mknod with (S_IFREG|mode) + doesn't work */ + tmp_fd = creat (real_path, mode); + if (tmp_fd == -1) { + gf_log (this->name, GF_LOG_ERROR, + "create failed on %s: %s", + real_path, strerror (errno)); + goto out; + } + close (tmp_fd); + } else { - gf_log (this->name, GF_LOG_ERROR, - "mknod on %s: %s", loc->path, - strerror (op_errno)); - goto out; - } + gf_log (this->name, GF_LOG_ERROR, + "mknod on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } + } + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); } #ifndef HAVE_SET_FSID - op_ret = lchown (real_path, frame->root->uid, frame->root->gid); + op_ret = lchown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lchown on %s: %s", loc->path, strerror (op_errno)); + "lchown on %s failed: %s", real_path, + strerror (op_errno)); goto out; } #endif - op_ret = lstat (real_path, &stbuf); +post_op: + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "mknod on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "mknod on %s: %s", loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if ((op_ret == -1) && (!was_present)) { + unlink (real_path); + } return 0; } -int32_t + +int posix_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode) + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - struct stat stbuf = {0, }; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + struct iatt stbuf = {0, }; + char was_present = 1; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -805,42 +1191,116 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + /* The Hidden directory should be for housekeeping purpose and it + should not get created from a user request */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "mkdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); + + gid = frame->root->gid; + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if ((op_ret == -1) && (errno == ENOENT)) { + was_present = 0; + } + + SET_FS_ID (frame->root->uid, gid); + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + mode |= S_ISGID; + } op_ret = mkdir (real_path, mode); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "mkdir of %s: %s", loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "mkdir of %s failed: %s", real_path, + strerror (op_errno)); goto out; } + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); + } + #ifndef HAVE_SET_FSID - op_ret = chown (real_path, frame->root->uid, frame->root->gid); + op_ret = chown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "chown on %s: %s", loc->path, strerror (op_errno)); + "chown on %s failed: %s", real_path, + strerror (op_errno)); goto out; } #endif - op_ret = lstat (real_path, &stbuf); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lstat on %s: %s", loc->path, strerror (op_errno)); + "lstat on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent of %s failed: %s", + real_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if ((op_ret == -1) && (!was_present)) { + unlink (real_path); + } return 0; } @@ -848,12 +1308,17 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, int32_t posix_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, int xflag, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - int32_t fd = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + struct iatt stbuf = {0,}; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -862,35 +1327,60 @@ posix_unlink (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - if (S_ISREG (loc->inode->st_mode)) { - fd = open (real_path, O_RDONLY); - if (fd == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "open of %s failed: %s", loc->path, - strerror (op_errno)); - goto out; + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } + + if (stbuf.ia_nlink == 1) + posix_handle_unset (this, stbuf.ia_gfid, NULL); + + priv = this->private; + if (priv->background_unlink) { + if (IA_ISREG (loc->inode->ia_type)) { + fd = open (real_path, O_RDONLY); + if (fd == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "open of %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } } } - op_ret = unlink (real_path); + op_ret = sys_unlink (real_path); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "unlink of %s: %s", loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "unlink of %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, + &preparent, &postparent, NULL); if (fd != -1) { close (fd); @@ -899,13 +1389,20 @@ posix_unlink (call_frame_t *frame, xlator_t *this, return 0; } -int32_t + +int posix_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - char * real_path = 0; + char * real_path = NULL; + char * par_path = NULL; + char * gfid_str = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + struct iatt stbuf; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -914,37 +1411,102 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = rmdir (real_path); + /* The Hidden directory should be for housekeeping purpose and it + should not get deleted from inside process */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "rmdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + priv = this->private; + + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } + + if (flags) { + gfid_str = uuid_utoa (stbuf.ia_gfid); + char *tmp_path = alloca (strlen (priv->trash_path) + + strlen ("/") + + strlen (gfid_str) + 1); + + mkdir (priv->trash_path, 0755); + sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str); + op_ret = rename (real_path, tmp_path); + } else { + op_ret = rmdir (real_path); + } op_errno = errno; - if (op_errno == EEXIST) - /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ - op_errno = ENOTEMPTY; + if (op_ret == 0) { + posix_handle_unset (this, stbuf.ia_gfid, NULL); + } + + if (op_errno == EEXIST) + /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ + op_errno = ENOTEMPTY; + /* No need to log a common error as ENOTEMPTY */ if (op_ret == -1 && op_errno != ENOTEMPTY) { - gf_log (this->name, GF_LOG_WARNING, - "rmdir of %s: %s", loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "rmdir of %s failed: %s", real_path, + strerror (op_errno)); + } + + if (op_ret == -1) { + gf_log (this->name, + (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "%s on %s failed", (flags) ? "rename" : "rmdir", + real_path); goto out; } - out: + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent of %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } + +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + + STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, + &preparent, &postparent, NULL); return 0; } -int32_t + +int posix_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc) + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct stat stbuf = { 0, }; + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + char * par_path = 0; + struct iatt stbuf = { 0, }; + struct posix_private *priv = NULL; + gid_t gid = 0; + char was_present = 1; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -953,112 +1515,130 @@ posix_symlink (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (linkname, out); VALIDATE_OR_GOTO (loc, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); - op_ret = symlink (linkname, real_path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + + if ((op_ret == -1) && (errno == ENOENT)){ + was_present = 0; + } + + SET_FS_ID (frame->root->uid, gid); + gid = frame->root->gid; + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "symlink of %s --> %s: %s", - loc->path, linkname, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } -#ifndef HAVE_SET_FSID - op_ret = lchown (real_path, frame->root->uid, frame->root->gid); + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + op_ret = symlink (linkname, real_path); + if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lchown failed on %s: %s", - loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "symlink of %s --> %s failed: %s", + real_path, linkname, strerror (op_errno)); goto out; } -#endif - op_ret = lstat (real_path, &stbuf); + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); + } + +#ifndef HAVE_SET_FSID + op_ret = lchown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lstat failed on %s: %s", - loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "lchown failed on %s: %s", + real_path, strerror (op_errno)); goto out; } +#endif - op_ret = 0; - - out: - SET_TO_OLD_FS_ID (); - - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); - - return 0; -} - - -int -posix_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_oldpath = NULL; - char * real_newpath = NULL; - struct stat stbuf = {0, }; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (oldloc, out); - VALIDATE_OR_GOTO (newloc, out); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_oldpath, this, oldloc->path); - MAKE_REAL_PATH (real_newpath, this, newloc->path); + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } - op_ret = rename (real_oldpath, real_newpath); + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, - (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), - "rename of %s to %s failed: %s", - oldloc->path, newloc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "lstat failed on %s: %s", + real_path, strerror (op_errno)); goto out; } - op_ret = lstat (real_newpath, &stbuf); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s failed: %s", - real_newpath, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if ((op_ret == -1) && (!was_present)) { + unlink (real_path); + } return 0; } int -posix_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) +posix_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_oldpath = 0; - char * real_newpath = 0; - struct stat stbuf = {0, }; - + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_oldpath = NULL; + char *real_newpath = NULL; + char *par_oldpath = NULL; + char *par_newpath = NULL; + struct iatt stbuf = {0, }; + struct posix_private *priv = NULL; + char was_present = 1; + struct iatt preoldparent = {0, }; + struct iatt postoldparent = {0, }; + struct iatt prenewparent = {0, }; + struct iatt postnewparent = {0, }; + char olddirid[64]; + char newdirid[64]; + uuid_t victim = {0}; + int was_dir = 0; + int nlink = 0; DECLARE_OLD_FS_ID_VAR; @@ -1067,211 +1647,237 @@ posix_link (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (oldloc, out); VALIDATE_OR_GOTO (newloc, out); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_oldpath, this, oldloc->path); - MAKE_REAL_PATH (real_newpath, this, newloc->path); + MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - op_ret = link (real_oldpath, real_newpath); + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "link %s to %s failed: %s", - oldloc->path, newloc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent %s failed: %s", + par_oldpath, strerror (op_errno)); goto out; } - op_ret = lstat (real_newpath, &stbuf); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s failed: %s", - real_newpath, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent of %s failed: %s", + par_newpath, strerror (op_errno)); goto out; } - op_ret = 0; - - out: - SET_TO_OLD_FS_ID (); - - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, oldloc->inode, &stbuf); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); + if ((op_ret == -1) && (errno == ENOENT)){ + was_present = 0; + } else { + uuid_copy (victim, stbuf.ia_gfid); + if (IA_ISDIR (stbuf.ia_type)) + was_dir = 1; + nlink = stbuf.ia_nlink; + } - return 0; -} + if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { + gf_log (this->name, GF_LOG_WARNING, + "found directory at %s while expecting ENOENT", + real_newpath); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + if (was_present && IA_ISDIR(stbuf.ia_type) && + uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "found directory %s at %s while renaming %s", + uuid_utoa_r (newloc->inode->gfid, olddirid), + real_newpath, + uuid_utoa_r (stbuf.ia_gfid, newdirid)); + op_ret = -1; + op_errno = EEXIST; + goto out; + } -int -posix_chmod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct stat stbuf = {0,}; + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_unset (this, oldloc->inode->gfid, NULL); + } - DECLARE_OLD_FS_ID_VAR; + op_ret = sys_rename (real_oldpath, real_newpath); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, + (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), + "rename of %s to %s failed: %s", + real_oldpath, real_newpath, strerror (op_errno)); + goto out; + } - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); + if (was_dir) + posix_handle_unset (this, victim, NULL); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + if (was_present && !was_dir && nlink == 1) + posix_handle_unset (this, victim, NULL); - if (S_ISLNK (loc->inode->st_mode)) { - /* chmod on a link should always succeed */ - op_ret = lstat (real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - op_ret = 0; - goto out; + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_soft (this, real_newpath, newloc, + oldloc->inode->gfid, NULL); } - op_ret = lchmod (real_path, mode); - if ((op_ret == -1) && (errno == ENOSYS)) { - gf_log (this->name, GF_LOG_DEBUG, - "lchmod not implemented, falling back to chmod"); - op_ret = chmod (real_path, mode); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", + real_newpath, strerror (op_errno)); + goto out; } + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "chmod on %s failed: %s", - loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent %s failed: %s", + par_oldpath, strerror (op_errno)); goto out; } - op_ret = lstat (real_path, &stbuf); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", - real_path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent %s failed: %s", + par_newpath, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: + SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, + &preoldparent, &postoldparent, + &prenewparent, &postnewparent, NULL); + + if ((op_ret == -1) && !was_present) { + unlink (real_newpath); + } return 0; } int -posix_chown (call_frame_t *frame, xlator_t *this, - loc_t *loc, uid_t uid, gid_t gid) +posix_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct stat stbuf = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_oldpath = 0; + char *real_newpath = 0; + char *par_newpath = 0; + struct iatt stbuf = {0, }; + struct posix_private *priv = NULL; + char was_present = 1; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (oldloc, out); + VALIDATE_OR_GOTO (newloc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); + + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); + if ((op_ret == -1) && (errno == ENOENT)) { + was_present = 0; + } - op_ret = lchown (real_path, uid, gid); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lchown on %s failed: %s", - loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", + par_newpath, strerror (op_errno)); goto out; } - op_ret = lstat (real_path, &stbuf); +#ifdef HAVE_LINKAT + /* + * On most systems (Linux being the notable exception), link(2) + * first resolves symlinks. If the target is a directory or + * is nonexistent, it will fail. linkat(2) operates on the + * symlink instead of its target when the AT_SYMLINK_FOLLOW + * flag is not supplied. + */ + op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0); +#else + op_ret = link (real_oldpath, real_newpath); +#endif if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s failed: %s", - real_path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "link %s to %s failed: %s", + real_oldpath, real_newpath, strerror (op_errno)); goto out; } - op_ret = 0; - - out: - SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); - - return 0; -} - - -int32_t -posix_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct stat stbuf = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = truncate (real_path, offset); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "truncate on %s failed: %s", - loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", + real_newpath, strerror (op_errno)); goto out; } - op_ret = lstat (real_path, &stbuf); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "lstat on %s failed: %s", - real_path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", + par_newpath, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, + (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if ((op_ret == -1) && (!was_present)) { + unlink (real_newpath); + } return 0; } -int -posix_utimens (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct timespec ts[2]) +int32_t +posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct stat stbuf = {0,}; - struct timeval tv[2] = {{0,},{0,}}; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + struct posix_private *priv = NULL; + struct iatt prebuf = {0,}; + struct iatt postbuf = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1279,58 +1885,68 @@ posix_utimens (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); - tv[0].tv_sec = ts[0].tv_sec; - tv[0].tv_usec = ts[0].tv_nsec / 1000; - tv[1].tv_sec = ts[1].tv_sec; - tv[1].tv_usec = ts[1].tv_nsec / 1000; + SET_FS_ID (frame->root->uid, frame->root->gid); - op_ret = lutimes (real_path, tv); - if ((op_ret == -1) && (errno == ENOSYS)) { - op_ret = utimes (real_path, tv); + MAKE_INODE_HANDLE (real_path, this, loc, &prebuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on %s failed: %s", + real_path, strerror (op_errno)); + goto out; } + op_ret = truncate (real_path, offset); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "utimes on %s: %s", real_path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "truncate on %s failed: %s", + real_path, strerror (op_errno)); goto out; } - op_ret = lstat (real_path, &stbuf); + op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "lstat on %s: %s", real_path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", + real_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, + &prebuf, &postbuf, NULL); return 0; } -int32_t + +int posix_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, - fd_t *fd) + mode_t umask, fd_t *fd, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t _fd = -1; - int _flags = 0; - char * real_path = NULL; - struct stat stbuf = {0, }; - struct posix_fd * pfd = NULL; - struct posix_private * priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t _fd = -1; + int _flags = 0; + char * real_path = NULL; + char * par_path = NULL; + struct iatt stbuf = {0, }; + struct posix_fd * pfd = NULL; + struct posix_private * priv = NULL; + char was_present = 1; + + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1341,9 +1957,26 @@ posix_create (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); priv = this->private; + VALIDATE_OR_GOTO (priv, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + + gid = frame->root->gid; + + SET_FS_ID (frame->root->uid, gid); + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } if (!flags) { _flags = O_CREAT | O_RDWR | O_EXCL; @@ -1352,76 +1985,129 @@ posix_create (call_frame_t *frame, xlator_t *this, _flags = flags | O_CREAT; } + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if ((op_ret == -1) && (errno == ENOENT)) { + was_present = 0; + } + if (priv->o_direct) - flags |= O_DIRECT; + _flags |= O_DIRECT; _fd = open (real_path, _flags, mode); if (_fd == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "open on %s: %s", loc->path, strerror (op_errno)); + op_ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "open on %s failed: %s", real_path, + strerror (op_errno)); goto out; } + if (was_present) + goto fill_stat; + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); + } + #ifndef HAVE_SET_FSID - op_ret = chown (real_path, frame->root->uid, frame->root->gid); + op_ret = chown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_ERROR, "chown on %s failed: %s", - real_path, strerror (op_errno)); - goto out; + real_path, strerror (op_errno)); } #endif - op_ret = fstat (_fd, &stbuf); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + +fill_stat: + op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_ERROR, "fstat on %d failed: %s", _fd, strerror (op_errno)); goto out; } - op_ret = -1; - pfd = CALLOC (1, sizeof (*pfd)); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); + goto out; + } + op_ret = -1; + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); if (!pfd) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - close (_fd); goto out; } pfd->flags = flags; pfd->fd = _fd; - fd_ctx_set (fd, this, (uint64_t)(long)pfd); + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set the fd context path=%s fd=%p", + real_path, fd); - ((struct posix_private *)this->private)->stats.nr_files++; + LOCK (&priv->lock); + { + priv->nr_files++; + } + UNLOCK (&priv->lock); op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf); + if ((-1 == op_ret) && (_fd != -1)) { + close (_fd); + + if (!was_present) { + unlink (real_path); + } + } + + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, + fd, (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, xdata); return 0; } int32_t posix_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd) + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - int32_t _fd = -1; - struct posix_fd * pfd = NULL; - struct posix_private * priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + int32_t _fd = -1; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; + struct iatt stbuf = {0, }; DECLARE_OLD_FS_ID_VAR; @@ -1432,89 +2118,76 @@ posix_open (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); + op_ret = -1; SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); if (priv->o_direct) flags |= O_DIRECT; _fd = open (real_path, flags, 0); if (_fd == -1) { + op_ret = -1; op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_ERROR, "open on %s: %s", real_path, strerror (op_errno)); goto out; } - pfd = CALLOC (1, sizeof (*pfd)); - + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); if (!pfd) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); goto out; } pfd->flags = flags; pfd->fd = _fd; - fd_ctx_set (fd, this, (uint64_t)(long)pfd); - - ((struct posix_private *)this->private)->stats.nr_files++; + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set the fd context path=%s fd=%p", + real_path, fd); -#ifndef HAVE_SET_FSID - if (flags & O_CREAT) { - op_ret = chown (real_path, frame->root->uid, frame->root->gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "chown on %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } + LOCK (&priv->lock); + { + priv->nr_files++; } -#endif + UNLOCK (&priv->lock); op_ret = 0; - out: +out: if (op_ret == -1) { if (_fd != -1) { close (_fd); - _fd = -1; } } SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); return 0; } -#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \ - (unsigned long)(~(bound - 1)))) - int posix_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - uint64_t tmp_pfd = 0; int32_t op_ret = -1; int32_t op_errno = 0; - char * buf = NULL; - char * alloc_buf = NULL; int _fd = -1; struct posix_private * priv = NULL; - dict_t * reply_dict = NULL; + struct iobuf * iobuf = NULL; + struct iobref * iobref = NULL; struct iovec vec = {0,}; struct posix_fd * pfd = NULL; - struct stat stbuf = {0,}; - int align = 1; + struct iatt stbuf = {0,}; int ret = -1; - int dict_ret = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -1522,137 +2195,218 @@ posix_readv (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this->private, out); priv = this->private; + VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; if (!size) { op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, "size == 0"); + gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); goto out; } - if (pfd->flags & O_DIRECT) { - align = 4096; /* align to page boundary */ - } - - alloc_buf = MALLOC (1 * (size + align)); - if (!alloc_buf) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); + if (!iobuf) { + op_errno = ENOMEM; goto out; } - /* page aligned buffer */ - buf = ALIGN_BUF (alloc_buf, align); - _fd = pfd->fd; - - op_ret = lseek (_fd, offset, SEEK_SET); + op_ret = pread (_fd, iobuf->ptr, size, offset); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lseek(%"PRId64") failed: %s", - offset, strerror (op_errno)); + "read failed on fd=%p: %s", fd, + strerror (op_errno)); goto out; } - op_ret = read (_fd, buf, size); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "read failed: %s", strerror (op_errno)); - goto out; + LOCK (&priv->lock); + { + priv->read_value += op_ret; } + UNLOCK (&priv->lock); - priv->read_value += op_ret; - priv->interval_read += op_ret; - - vec.iov_base = buf; + vec.iov_base = iobuf->ptr; vec.iov_len = op_ret; - op_ret = -1; - reply_dict = get_new_dict (); - if (!reply_dict) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - goto out; - } - dict_ref (reply_dict); + iobref = iobref_new (); - dict_ret = dict_set_ptr (reply_dict, NULL, alloc_buf); - if (dict_ret < 0) { - op_errno = -dict_ret; - gf_log (this->name, GF_LOG_ERROR, "could not dict_set: (%s)", - strerror (op_errno)); - goto out; - } + iobref_add (iobref, iobuf); /* * readv successful, and we need to get the stat of the file * we read from */ - op_ret = fstat (_fd, &stbuf); + op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "fstat failed: %s", strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "fstat failed on fd=%p: %s", fd, + strerror (op_errno)); goto out; } - - op_ret = vec.iov_len; - out: - if (op_ret == -1) { - frame->root->rsp_refs = NULL; - if (reply_dict) { - dict_unref (reply_dict); - reply_dict = NULL; + /* Hack to notify higher layers of EOF. */ + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) + op_errno = ENOENT; + + op_ret = vec.iov_len; +out: + + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, + &vec, 1, &stbuf, iobref, NULL); + + if (iobref) + iobref_unref (iobref); + if (iobuf) + iobuf_unref (iobuf); + + return 0; +} + + +int32_t +__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset) +{ + int32_t op_ret = 0; + int idx = 0; + int retval = 0; + off_t internal_off = 0; + + if (!vector) + return -EFAULT; + + internal_off = offset; + for (idx = 0; idx < count; idx++) { + retval = pwrite (fd, vector[idx].iov_base, vector[idx].iov_len, + internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; } + op_ret += retval; + internal_off += retval; + } + +err: + return op_ret; +} - if ((alloc_buf != NULL) && (dict_ret != -1)) - FREE (alloc_buf); +int32_t +__posix_writev (int fd, struct iovec *vector, int count, off_t startoff, + int odirect) +{ + int32_t op_ret = 0; + int idx = 0; + int max_buf_size = 0; + int retval = 0; + char *buf = NULL; + char *alloc_buf = NULL; + off_t internal_off = 0; + + /* Check for the O_DIRECT flag during open() */ + if (!odirect) + return __posix_pwritev (fd, vector, count, startoff); + + for (idx = 0; idx < count; idx++) { + if (max_buf_size < vector[idx].iov_len) + max_buf_size = vector[idx].iov_len; } - if (reply_dict) - frame->root->rsp_refs = reply_dict; + alloc_buf = _page_aligned_alloc (max_buf_size, &buf); + if (!alloc_buf) { + op_ret = -errno; + goto err; + } - STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf); + internal_off = startoff; + for (idx = 0; idx < count; idx++) { + memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); - if (reply_dict) - dict_unref (reply_dict); + /* not sure whether writev works on O_DIRECT'd fd */ + retval = pwrite (fd, buf, vector[idx].iov_len, internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } - return 0; + op_ret += retval; + internal_off += retval; + } + +err: + GF_FREE (alloc_buf); + + return op_ret; } +dict_t* +_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: " + "fd: %p inode: %p gfid:%s", fd, inode?inode:0, + inode?uuid_utoa(inode->gfid):"N/A"); + goto out; + } + + if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_OPEN_FD_COUNT); + } + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, + is_append); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_WRITE_IS_APPEND); + } +out: + return rsp_xdata; +} int32_t -posix_writev (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iovec *vector, int32_t count, off_t offset) +posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_private * priv = NULL; struct posix_fd * pfd = NULL; - struct stat stbuf = {0,}; + struct iatt preop = {0,}; + struct iatt postop = {0,}; int ret = -1; - - int idx = 0; - int align = 4096; - int max_buf_size = 0; - int retval = 0; - char * buf = NULL; - char * alloc_buf = NULL; - uint64_t tmp_pfd = 0; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -1664,115 +2418,111 @@ posix_writev (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = lseek (_fd, offset, SEEK_SET); + if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + locked = _gf_true; + LOCK(&fd->inode->lock); + } + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lseek(%"PRId64") failed: %s", - offset, strerror (op_errno)); + "pre-operation fstat failed on fd=%p: %s", fd, + strerror (op_errno)); goto out; } - /* Check for the O_DIRECT flag during open() */ - if (pfd->flags & O_DIRECT) { - /* This is O_DIRECT'd file */ - op_ret = -1; - for (idx = 0; idx < count; idx++) { - if (max_buf_size < vector[idx].iov_len) - max_buf_size = vector[idx].iov_len; - } - - alloc_buf = MALLOC (1 * (max_buf_size + align)); - if (!alloc_buf) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - goto out; - } - - for (idx = 0; idx < count; idx++) { - /* page aligned buffer */ - buf = ALIGN_BUF (alloc_buf, align); - - memcpy (buf, vector[idx].iov_base, - vector[idx].iov_len); - - /* not sure whether writev works on O_DIRECT'd fd */ - retval = write (_fd, buf, vector[idx].iov_len); - - if (retval == -1) { - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "O_DIRECT enabled: %s", - strerror (op_errno)); - goto out; - } + if (locked) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } - break; - } - if (op_ret == -1) - op_ret = 0; - op_ret += retval; - } + op_ret = __posix_writev (_fd, vector, count, offset, + (pfd->flags & O_DIRECT)); - } else /* if (O_DIRECT) */ { + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } - /* This is not O_DIRECT'd fd */ - op_ret = writev (_fd, vector, count); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "writev failed: %s", - strerror (op_errno)); - goto out; - } + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64 + ", %s", offset, strerror (op_errno)); + goto out; } - priv->write_value += op_ret; - priv->interval_write += op_ret; + LOCK (&priv->lock); + { + priv->write_value += op_ret; + } + UNLOCK (&priv->lock); if (op_ret >= 0) { + rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); /* wiretv successful, we also need to get the stat of * the file we wrote to */ - ret = fstat (_fd, &stbuf); + + if (flags & (O_SYNC|O_DSYNC)) { + ret = fsync (_fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + _fd, strerror (errno)); + op_ret = -1; + op_errno = errno; + goto out; + } + } + + ret = posix_fdstat (this, _fd, &postop); if (ret == -1) { - op_ret = -1; + op_ret = -1; op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "fstat failed: %s", - strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "post-operation fstat failed on fd=%p: %s", + fd, strerror (op_errno)); goto out; } } - out: - if (alloc_buf) { - FREE (alloc_buf); - } +out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + + if (rsp_xdata) + dict_unref (rsp_xdata); return 0; } int32_t posix_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, dict_t *xdata) { char * real_path = NULL; int32_t op_ret = -1; @@ -1785,7 +2535,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (this->private, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); priv = this->private; @@ -1793,8 +2543,9 @@ posix_statfs (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s", - strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "statvfs failed on %s: %s", + real_path, strerror (op_errno)); goto out; } @@ -1809,118 +2560,127 @@ posix_statfs (call_frame_t *frame, xlator_t *this, op_ret = 0; - out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &buf); +out: + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); return 0; } int32_t posix_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - int _fd = -1; - struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL on fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - _fd = pfd->fd; - - /* do nothing */ op_ret = 0; - out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); +out: + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); return 0; } int32_t -posix_release (xlator_t *this, - fd_t *fd) +posix_release (xlator_t *this, fd_t *fd) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; struct posix_private * priv = NULL; struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + uint64_t tmp_pfd = 0; VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); priv = this->private; - priv->stats.nr_files--; - - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = fd_ctx_del (fd, this, &tmp_pfd); if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - _fd = pfd->fd; - - op_ret = close (_fd); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "close(): %s", strerror (op_errno)); - goto out; - } + pfd = (struct posix_fd *)(long)tmp_pfd; if (pfd->dir) { - op_ret = -1; - op_errno = EBADF; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); - goto out; } - op_ret = 0; + pthread_mutex_lock (&priv->janitor_lock); + { + INIT_LIST_HEAD (&pfd->list); + list_add_tail (&pfd->list, &priv->janitor_fds); + pthread_cond_signal (&priv->janitor_cond); + } + pthread_mutex_unlock (&priv->janitor_lock); - out: - if (pfd) - FREE (pfd); + LOCK (&priv->lock); + { + priv->nr_files--; + } + UNLOCK (&priv->lock); +out: return 0; } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync) + fd_t *fd, int32_t datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + struct iatt preop = {0,}; + struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -1936,182 +2696,94 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif - ret = fd_ctx_get (fd, this, &tmp_pfd); + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, "pfd not found in fd's ctx"); + gf_log (this->name, GF_LOG_WARNING, + "pfd not found in fd's ctx"); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; + op_ret = posix_fdstat (this, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "pre-operation fstat failed on fd=%p: %s", fd, + strerror (op_errno)); + goto out; + } + if (datasync) { ; #ifdef HAVE_FDATASYNC op_ret = fdatasync (_fd); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "fdatasync on fd=%p failed: %s", + fd, strerror (errno)); + } #endif } else { op_ret = fsync (_fd); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "fsync: %s", - strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, + "fsync on fd=%p failed: %s", + fd, strerror (op_errno)); + goto out; } } + op_ret = posix_fdstat (this, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "post-operation fstat failed on fd=%p: %s", fd, + strerror (op_errno)); + goto out; + } + op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, + NULL); return 0; } static int gf_posix_xattr_enotsup_log; - -int -set_file_contents (xlator_t *this, char *real_path, - data_pair_t *trav, int flags) -{ - char * key = NULL; - char real_filepath[ZR_PATH_MAX] = {0,}; - int32_t file_fd = -1; - int op_ret = 0; - int ret = -1; - - key = &(trav->key[15]); - sprintf (real_filepath, "%s/%s", real_path, key); - - if (flags & XATTR_REPLACE) { - /* if file exists, replace it - * else, error out */ - file_fd = open (real_filepath, O_TRUNC|O_WRONLY); - - if (file_fd == -1) { - goto create; - } - - if (trav->value->len) { - ret = write (file_fd, trav->value->data, - trav->value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed while doing setxattr " - "for key %s on path %s: %s", - key, real_filepath, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - } - - create: /* we know file doesn't exist, create it */ - - file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "failed to open file %s with O_CREAT: %s", - key, strerror (errno)); - goto out; - } - - ret = write (file_fd, trav->value->data, trav->value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed on %s while setxattr with " - "key %s: %s", - real_filepath, key, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s while setxattr with " - "key %s: %s", - real_filepath, key, strerror (errno)); - goto out; - } - } - - out: - return op_ret; -} - -int -handle_pair (xlator_t *this, char *real_path, - data_pair_t *trav, int flags) +static int +_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) { - int sys_ret = -1; - int ret = 0; + posix_xattr_filler_t *filler = NULL; - if (ZR_FILE_CONTENT_REQUEST(trav->key)) { - ret = set_file_contents (this, real_path, trav, flags); - } else { - sys_ret = sys_lsetxattr (real_path, trav->key, - trav->value->data, - trav->value->len, flags); - - if (sys_ret < 0) { - if (errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported"); - } else if (errno == ENOENT) { - gf_log (this->name, GF_LOG_DEBUG, - "setxattr on %s failed: %s", real_path, - strerror (errno)); - } else { - -#ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_WARNING), - "%s: key:%s error:%s", - real_path, trav->key, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_WARNING, - "%s: key:%s error:%s", - real_path, trav->key, - strerror (errno)); -#endif /* DARWIN */ - } + filler = tmp; - ret = -errno; - goto out; - } - } - out: - return ret; + return posix_handle_pair (filler->this, filler->real_path, k, v, + filler->flags); } int32_t posix_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags) + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - data_pair_t * trav = NULL; - int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2121,96 +2793,72 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (dict, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); - trav = dict->members_list; + op_ret = -1; + dict_del (dict, GFID_XATTR_KEY); - while (trav) { - ret = handle_pair (this, real_path, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.real_path = real_path; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); return 0; } + int -get_file_contents (xlator_t *this, char *real_path, - const char *name, char **contents) +posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) { - char real_filepath[ZR_PATH_MAX] = {0,}; - char * key = NULL; - int32_t file_fd = -1; - struct stat stbuf = {0,}; - int op_ret = 0; - int ret = -1; - - key = (char *) &(name[15]); - sprintf (real_filepath, "%s/%s", real_path, key); - - op_ret = lstat (real_filepath, &stbuf); - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - - file_fd = open (real_filepath, O_RDONLY); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - - *contents = CALLOC (stbuf.st_size + 1, sizeof(char)); - - if (! *contents) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); - goto out; - } + char *real_path = NULL; + struct dirent *dirent = NULL; + DIR *fd = NULL; + const char *fname = NULL; + char *found = NULL; + int ret = -1; + int op_ret = -1; + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + fd = opendir (real_path); + if (!fd) + return -errno; + + fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); + + while ((dirent = readdir (fd))) { + if (strcasecmp (dirent->d_name, fname) == 0) { + found = gf_strdup (dirent->d_name); + if (!found) { + closedir (fd); + return -ENOMEM; + } + break; + } + } - ret = read (file_fd, *contents, stbuf.st_size); - if (ret <= 0) { - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, "read on %s failed", - real_filepath); - goto out; - } + closedir (fd); - *contents[stbuf.st_size] = '\0'; + if (!found) + return -ENOENT; - op_ret = close (file_fd); - file_fd = -1; - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s", - real_filepath, strerror (errno)); - goto out; - } - - out: - if (op_ret < 0) { - if (*contents) - FREE (*contents); - if (file_fd != -1) - close (file_fd); - } + ret = dict_set_dynstr (dict, (char *)key, found); + if (ret) { + GF_FREE (found); + return -ENOMEM; + } + ret = strlen (found) + 1; - return op_ret; + return ret; } /** @@ -2220,20 +2868,25 @@ get_file_contents (xlator_t *this, char *real_path, */ int32_t posix_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) -{ - int32_t op_ret = -1; - int32_t op_errno = ENOENT; - int32_t list_offset = 0; - size_t size = 0; - size_t remaining_size = 0; - char key[1024] = {0,}; - char * value = NULL; - char * list = NULL; - char * real_path = NULL; - dict_t * dict = NULL; - char * file_contents = NULL; - int ret = -1; + loc_t *loc, const char *name, dict_t *xdata) +{ + struct posix_private *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t list_offset = 0; + ssize_t size = 0; + size_t remaining_size = 0; + char key[4096] = {0,}; + char host_buf[1024] = {0,}; + char *value = NULL; + char *list = NULL; + char *real_path = NULL; + dict_t *dict = NULL; + char *file_contents = NULL; + int ret = -1; + char *path = NULL; + char *rpath = NULL; + char *dyn_rpath = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2242,28 +2895,194 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + op_ret = -1; + priv = this->private; - if (loc->inode && S_ISDIR(loc->inode->st_mode) && name && - ZR_FILE_CONTENT_REQUEST(name)) { - ret = get_file_contents (this, real_path, name, - &file_contents); + if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && + ZR_FILE_CONTENT_REQUEST(name)) { + ret = posix_get_file_contents (this, loc->gfid, &name[15], + &file_contents); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_ERROR, - "getting file contents failed: %s", + "getting file contents failed: %s", strerror (op_errno)); goto out; } } - /* Get the total size */ - dict = get_new_dict (); + dict = dict_new (); if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + op_errno = ENOMEM; goto out; } + if (loc->inode && name && + (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename (frame, this, loc, + name, dict, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + gf_log (this->name, (op_errno == ENOENT) ? + GF_LOG_DEBUG : GF_LOG_WARNING, + "Failed to get real filename (%s, %s): %s", + loc->path, name, strerror (op_errno)); + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { + if (!list_empty (&loc->inode->fd_list)) { + ret = dict_set_uint32 (dict, (char *)name, 1); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to set dictionary value for %s", + name); + } else { + ret = dict_set_uint32 (dict, (char *)name, 0); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to set dictionary value for %s", + name); + } + goto done; + } + if (loc->inode && name && + (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) { + if (LOC_HAS_ABSPATH (loc)) + MAKE_REAL_PATH (rpath, this, loc->path); + else + rpath = real_path; + + (void) snprintf (host_buf, 1024, + "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo + && !uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa (priv->glusterd_uuid) + : priv->hostname), + rpath); + + dyn_rpath = gf_strdup (host_buf); + if (!dyn_rpath) { + ret = -1; + goto done; + } + size = strlen (dyn_rpath) + 1; + ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY, + dyn_rpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + dyn_rpath); + GF_FREE (dyn_rpath); + } + + goto done; + } + + if (loc->inode && name && + (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0) + && !uuid_is_null (priv->glusterd_uuid)) { + (void) snprintf (host_buf, 1024, "%s", + uuid_utoa (priv->glusterd_uuid)); + + dyn_rpath = gf_strdup (host_buf); + if (!dyn_rpath) { + ret = -1; + goto done; + } + + size = strlen (dyn_rpath) + 1; + ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY, + dyn_rpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + dyn_rpath); + GF_FREE (dyn_rpath); + } + goto done; + } + + if (loc->inode && name && + (strcmp (name, GFID_TO_PATH_KEY) == 0)) { + ret = inode_path (loc->inode, NULL, &path); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: could not get " + "inode path", uuid_utoa (loc->inode->gfid)); + goto done; + } + + ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + host_buf); + GF_FREE (path); + } + goto done; + } + + if (name) { + strcpy (key, name); + + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else if (op_errno == ENOATTR || + op_errno == ENODATA) { + gf_log (this->name, GF_LOG_DEBUG, + "No such attribute:%s for file %s", + key, real_path); + } else { + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s: %s (%s)", + real_path, key, strerror (op_errno)); + } + + goto done; + } + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + goto out; + } + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); + goto out; + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } + + goto done; + } + size = sys_llistxattr (real_path, NULL, 0); if (size == -1) { op_errno = errno; @@ -2271,11 +3090,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); } else { gf_log (this->name, GF_LOG_ERROR, - "listxattr failed on %s: %s", + "listxattr failed on %s: %s", real_path, strerror (op_errno)); } goto out; @@ -2287,7 +3108,6 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, list = alloca (size + 1); if (!list) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); goto out; } @@ -2296,43 +3116,63 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, remaining_size = size; list_offset = 0; while (remaining_size > 0) { - if(*(list + list_offset) == '\0') + if (*(list + list_offset) == '\0') break; strcpy (key, list + list_offset); - op_ret = sys_lgetxattr (real_path, key, NULL, 0); - if (op_ret == -1) + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); break; + } - value = CALLOC (op_ret + 1, sizeof(char)); + value = GF_CALLOC (size + 1, sizeof(char), + gf_posix_mt_char); if (!value) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); goto out; } - op_ret = sys_lgetxattr (real_path, key, value, op_ret); - if (op_ret == -1) + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); break; + } + + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } - value [op_ret] = '\0'; - dict_set (dict, key, data_from_dynptr (value, op_ret)); remaining_size -= strlen (key) + 1; list_offset += strlen (key) + 1; } /* while (remaining_size > 0) */ - done: +done: op_ret = size; if (dict) { - dict_ref (dict); + dict_del (dict, GFID_XATTR_KEY); } - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, dict); + + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -2343,17 +3183,16 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, int32_t posix_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name) + fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = ENOENT; - uint64_t tmp_pfd = 0; struct posix_fd * pfd = NULL; int _fd = -1; int32_t list_offset = 0; - size_t size = 0; + ssize_t size = 0; size_t remaining_size = 0; - char key[1024] = {0,}; + char key[4096] = {0,}; char * value = NULL; char * list = NULL; dict_t * dict = NULL; @@ -2367,24 +3206,68 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, SET_FS_ID (frame->root->uid, frame->root->gid); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; /* Get the total size */ dict = get_new_dict (); if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); goto out; } + if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32 (dict, (char *)name, 1); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to set dictionary value for %s", + name); + goto done; + } + + if (name) { + strcpy (key, name); + + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "key %s (%s)", key, strerror (op_errno)); + goto done; + } + + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + goto out; + } + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); + goto out; + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on key %s failed", key); + GF_FREE (value); + goto out; + } + goto done; + } + size = sys_flistxattr (_fd, NULL, 0); if (size == -1) { op_errno = errno; @@ -2392,11 +3275,12 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting " + "brick with 'user_xattr' flag)"); } else { gf_log (this->name, GF_LOG_ERROR, - "listxattr failed on %p: %s", + "listxattr failed on %p: %s", fd, strerror (op_errno)); } goto out; @@ -2408,7 +3292,6 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, list = alloca (size + 1); if (!list) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); goto out; } @@ -2421,39 +3304,60 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, break; strcpy (key, list + list_offset); - op_ret = sys_fgetxattr (_fd, key, NULL, 0); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); break; + } - value = CALLOC (op_ret + 1, sizeof(char)); + value = GF_CALLOC (size + 1, sizeof(char), + gf_posix_mt_char); if (!value) { + op_ret = -1; op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); goto out; } - op_ret = sys_fgetxattr (_fd, key, value, op_ret); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "the fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); break; + } - value [op_ret] = '\0'; - dict_set (dict, key, data_from_dynptr (value, op_ret)); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "failed on key %s", key); + GF_FREE (value); + goto out; + } remaining_size -= strlen (key) + 1; list_offset += strlen (key) + 1; } /* while (remaining_size > 0) */ - done: +done: op_ret = size; if (dict) { + dict_del (dict, GFID_XATTR_KEY); dict_ref (dict); } - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, dict); + + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -2461,64 +3365,29 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, return 0; } +static int +_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; -int -fhandle_pair (xlator_t *this, int fd, - data_pair_t *trav, int flags) -{ - int sys_ret = -1; - int ret = 0; - - sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data, - trav->value->len, flags); - - if (sys_ret < 0) { - if (errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported"); - } else if (errno == ENOENT) { - gf_log (this->name, GF_LOG_DEBUG, - "fsetxattr on fd=%d failed: %s", fd, - strerror (errno)); - } else { - -#ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_WARNING), - "fd=%d: key:%s error:%s", - fd, trav->key, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_WARNING, - "fd=%d: key:%s error:%s", - fd, trav->key, - strerror (errno)); -#endif /* DARWIN */ - } - - ret = -errno; - goto out; - } + filler = tmp; -out: - return ret; + return posix_fhandle_pair (filler->this, filler->fd, k, v, + filler->flags); } - int32_t posix_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags) + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; int _fd = -1; - data_pair_t * trav = NULL; - int ret = -1; + int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2528,106 +3397,190 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); VALIDATE_OR_GOTO (dict, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - trav = dict->members_list; - - while (trav) { - ret = fhandle_pair (this, _fd, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } + dict_del (dict, GFID_XATTR_KEY); - op_ret = 0; + filler.fd = _fd; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); return 0; } +int +_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *) data; + this = filler->this; + + op_ret = sys_lremovexattr (filler->real_path, key); + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr failed on %s (for %s): %s", + filler->real_path, key, strerror (errno)); + } + + return op_ret; +} + int32_t posix_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + if (!strcmp (GFID_XATTR_KEY, name)) { + gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" + " on gfid for file %s", real_path); + op_ret = -1; + goto out; + } + SET_FS_ID (frame->root->uid, frame->root->gid); + /** + * sending an empty key name with xdata containing the + * list of key(s) to be removed implies "bulk remove request" + * for removexattr. + */ + if (name && (strcmp (name, "") == 0) && xdata) { + filler.real_path = real_path; + filler.this = this; + op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); + if (op_ret) { + op_errno = filler.op_errno; + } + + goto out; + } + op_ret = sys_lremovexattr (real_path, name); + if (op_ret == -1) { + op_errno = errno; + if (op_errno != ENOATTR && op_errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr on %s (for %s): %s", real_path, + name, strerror (op_errno)); + goto out; + } + + op_ret = 0; +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +posix_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd * pfd = NULL; + int _fd = -1; + int ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + if (!strcmp (GFID_XATTR_KEY, name)) { + gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" + " on gfid for file"); + goto out; + } + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + + + + SET_FS_ID (frame->root->uid, frame->root->gid); + + op_ret = sys_fremovexattr (_fd, name); if (op_ret == -1) { op_errno = errno; - if (op_errno != ENOATTR && op_errno != EPERM) - gf_log (this->name, GF_LOG_WARNING, - "removexattr on %s: %s", loc->path, - strerror (op_errno)); + if (op_errno != ENOATTR && op_errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "fremovexattr (for %s): %s", + name, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_fsyncdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync) + fd_t *fd, int datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - struct posix_fd * pfd = NULL; - int _fd = -1; int ret = -1; - uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - _fd = pfd->fd; op_ret = 0; - out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); +out: + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); return 0; } @@ -2635,12 +3588,12 @@ posix_fsyncdir (call_frame_t *frame, xlator_t *this, void posix_print_xattr (dict_t *this, - char *key, - data_t *value, - void *data) + char *key, + data_t *value, + void *data) { - gf_log ("posix", GF_LOG_TRACE, - "(key/val) = (%s/%d)", key, data_to_int32 (value)); + gf_log ("posix", GF_LOG_DEBUG, + "(key/val) = (%s/%d)", key, data_to_int32 (value)); } @@ -2654,236 +3607,281 @@ posix_print_xattr (dict_t *this, static void __add_array (int32_t *dest, int32_t *src, int count) { - int i = 0; - for (i = 0; i < count; i++) { - dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i])); - } + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32 (dest[i]); + if (destval == 0xffffffff) + continue; + dest[i] = hton32 (destval + ntoh32 (src[i])); + } } +static void +__or_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i])); + } +} -/** - * xattrop - xattr operations - for internal use by GlusterFS - * @optype: ADD_ARRAY: - * dict should contain: - * "key" ==> array of 32-bit numbers - */ +static void +__and_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i])); + } +} -int -posix_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) +static void +__add_long_array (int64_t *dest, int64_t *src, int count) { - char *real_path = NULL; - int32_t *array = NULL; - int size = 0; - int count = 0; + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i])); + } +} - int op_ret = 0; - int op_errno = 0; +static int +_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + + count = v->len; + array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); + + LOCK (&inode->lock); + { + if (filler->real_path) { + size = sys_lgetxattr (filler->real_path, k, + (char *)array, v->len); + } else { + size = sys_fgetxattr (filler->fd, k, (char *)array, + v->len); + } - data_pair_t *trav = NULL; + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && + (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr (marker_xattrs, + k)) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s while doing " + "xattrop: Key:%s (%s)", + filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fgetxattr failed on fd=%d while doing " + "xattrop: Key:%s (%s)", + filler->fd, + k, strerror (op_errno)); + } - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (xattr, out); - VALIDATE_OR_GOTO (this, out); + op_ret = -1; + goto unlock; + } - trav = xattr->members_list; + switch (optype) { - if (loc->path) - MAKE_REAL_PATH (real_path, this, loc->path); + case GF_XATTROP_ADD_ARRAY: + __add_array ((int32_t *) array, (int32_t *) v->data, + v->len / 4); + break; - while (trav) { - count = trav->value->len / sizeof (int32_t); - array = CALLOC (count, sizeof (int32_t)); - - size = sys_lgetxattr (real_path, trav->key, (char *)array, - trav->value->len); + case GF_XATTROP_ADD_ARRAY64: + __add_long_array ((int64_t *) array, (int64_t *) v->data, + v->len / 8); + break; - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "extended attributes not " - "supported by filesystem"); - } else { - gf_log (this->name, GF_LOG_ERROR, - "%s: %s", loc->path, - strerror (op_errno)); - } - goto out; - } + case GF_XATTROP_OR_ARRAY: + __or_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; - switch (optype) { + case GF_XATTROP_AND_ARRAY: + __and_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; - case GF_XATTROP_ADD_ARRAY: - __add_array (array, (int32_t *) trav->value->data, - trav->value->len / 4); - break; + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown xattrop type (%d) on %s. Please send " + "a bug report to gluster-devel@nongnu.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } - default: - gf_log (this->name, GF_LOG_ERROR, - "unknown xattrop type %d. path=%s", - optype, loc->path); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + if (filler->real_path) { + size = sys_lsetxattr (filler->real_path, k, array, + v->len, 0); + } else { + size = sys_fsetxattr (filler->fd, k, (char *)array, + v->len, 0); + } + } +unlock: + UNLOCK (&inode->lock); - size = sys_lsetxattr (real_path, trav->key, array, - trav->value->len, 0); + if (op_ret == -1) + goto out; - op_errno = errno; - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, - "%s: key=%s (%s)", loc->path, - trav->key, strerror (op_errno)); - op_ret = -1; - goto out; - } else { - size = dict_set_bin (xattr, trav->key, array, - trav->value->len); + op_errno = errno; + if (size == -1) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "setxattr failed on %s while doing xattrop: " + "key=%s (%s)", filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fsetxattr failed on fd=%d while doing xattrop: " + "key=%s (%s)", filler->fd, + k, strerror (op_errno)); + + op_ret = -1; + goto out; + } else { + size = dict_set_bin (d, k, array, v->len); + + if (size != 0) { + if (filler->real_path) + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", filler->real_path, + k, strerror (-size)); + else + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (fd=%d): " + "key=%s (%s)", filler->fd, + k, strerror (-size)); + + op_ret = -1; + op_errno = EINVAL; + goto out; + } + array = NULL; + } + + array = NULL; - if (size != 0) { - gf_log (this->name, GF_LOG_ERROR, - "%s: key=%s (%s)", loc->path, - trav->key, strerror (-size)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } - - array = NULL; - trav = trav->next; - } - out: - if (array) - FREE (array); - STACK_UNWIND (frame, op_ret, op_errno, xattr); - return 0; + return op_ret; } +/** + * xattrop - xattr operations - for internal use by GlusterFS + * @optype: ADD_ARRAY: + * dict should contain: + * "key" ==> array of 32-bit numbers + */ int -posix_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr) { - int32_t *array = NULL; - int size = 0; - int count = 0; + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0,}; - int op_ret = 0; - int op_errno = 0; + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (xattr, out); + VALIDATE_OR_GOTO (this, out); - int _fd = -1; - struct posix_fd *pfd = NULL; + if (fd) { + op_ret = posix_fd_ctx_get (fd, this, &pfd); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get pfd from fd=%p", + fd); + op_errno = EBADFD; + goto out; + } + _fd = pfd->fd; + } - data_pair_t *trav = NULL; - int32_t ret = -1; + if (loc && !uuid_is_null (loc->gfid)) + MAKE_INODE_HANDLE (real_path, this, loc, NULL); - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (xattr, out); - VALIDATE_OR_GOTO (this, out); + if (real_path) { + inode = loc->inode; + } else if (fd) { + inode = fd->inode; + } - trav = xattr->members_list; + filler.this = this; + filler.fd = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; - if (fd) { - ret = fd_ctx_get (fd, this, (uint64_t *)&pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to get pfd from fd=%p", - fd); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - _fd = pfd->fd; - } + op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, + &filler); - while (trav) { - count = trav->value->len / sizeof (int32_t); - array = CALLOC (count, sizeof (int32_t)); - - size = sys_fgetxattr (_fd, trav->key, (char *)array, - trav->value->len); - - op_errno = errno; - if ((size == -1) && ((op_errno != ENODATA) && - (op_errno != ENOATTR))) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "extended attributes not " - "supported by filesystem"); - } else { - gf_log (this->name, GF_LOG_ERROR, - "%d: %s", _fd, - strerror (op_errno)); - } - goto out; - } +out: - switch (optype) { - case GF_XATTROP_ADD_ARRAY: - __add_array (array, (int32_t *) trav->value->data, - trav->value->len / 4); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "unknown xattrop type %d. fd=%d", - optype, _fd); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL); + return 0; +} - size = sys_fsetxattr (_fd, trav->key, (char *)array, - trav->value->len, 0); - op_errno = errno; - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, - "%d: key=%s (%s)", _fd, - trav->key, strerror (op_errno)); - op_ret = -1; - goto out; - } else { - size = dict_set_bin (xattr, trav->key, array, - trav->value->len); +int +posix_xattrop (call_frame_t *frame, xlator_t *this, + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop (frame, this, loc, NULL, optype, xattr); + return 0; +} - if (size != 0) { - gf_log (this->name, GF_LOG_ERROR, - "%d: key=%s (%s)", _fd, - trav->key, strerror (-size)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } - - array = NULL; - trav = trav->next; - } - -out: - if (array) - FREE (array); - STACK_UNWIND (frame, op_ret, op_errno, xattr); - return 0; + +int +posix_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop (frame, this, NULL, fd, optype, xattr); + return 0; } int posix_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) + loc_t *loc, int32_t mask, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2892,38 +3890,37 @@ posix_access (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); op_ret = access (real_path, mask & 07); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "access failed on %s: %s", - loc->path, strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s", + real_path, strerror (op_errno)); goto out; } - op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct stat buf = {0,}; - struct posix_fd * pfd = NULL; - int ret = -1; - uint64_t tmp_pfd = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct iatt preop = {0,}; + struct iatt postop = {0,}; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2932,892 +3929,716 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; + op_ret = posix_fdstat (this, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation fstat failed on fd=%p: %s", fd, + strerror (op_errno)); + goto out; + } + op_ret = ftruncate (_fd, offset); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "ftruncate failed: %s", - strerror (errno)); + gf_log (this->name, GF_LOG_ERROR, + "ftruncate failed on fd=%p (%"PRId64": %s", + fd, offset, strerror (errno)); goto out; } - op_ret = fstat (_fd, &buf); + op_ret = posix_fdstat (this, _fd, &postop); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s", - strerror (errno)); + gf_log (this->name, GF_LOG_ERROR, + "post-operation fstat failed on fd=%p: %s", + fd, strerror (errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, + &postop, NULL); return 0; } + int32_t -posix_fchown (call_frame_t *frame, xlator_t *this, - fd_t *fd, uid_t uid, gid_t gid) +posix_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct stat buf = {0,}; - struct posix_fd * pfd = NULL; - int ret = -1; - uint64_t tmp_pfd = 0; + int _fd = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt buf = {0,}; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = fchown (_fd, uid, gid); + op_ret = posix_fdstat (this, _fd, &buf); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "fchown failed: %s", - strerror (op_errno)); - goto out; - } - - op_ret = fstat (_fd, &buf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s", - strerror (op_errno)); + gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s", + fd, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, NULL); return 0; } +static int gf_posix_lk_log; int32_t -posix_fchmod (call_frame_t *frame, xlator_t *this, - fd_t *fd, mode_t mode) +posix_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct stat buf = {0,}; - struct posix_fd * pfd = NULL; - int ret = -1; - uint64_t tmp_pfd = 0; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); + struct gf_flock nullock = {0, }; - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "pfd is NULL fd=%p", fd); - op_errno = -ret; - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - _fd = pfd->fd; + STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); + return 0; +} - op_ret = fchmod (_fd, mode); +int32_t +posix_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "fchmod failed: %s", strerror (errno)); - goto out; - } + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); + return 0; +} - op_ret = fstat (_fd, &buf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "fstat failed: %s", strerror (errno)); - goto out; - } +int32_t +posix_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - op_ret = 0; + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); + return 0; +} - out: - SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &buf); +int32_t +posix_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); return 0; } - -static int -same_file_type (mode_t m1, mode_t m2) +int32_t +posix_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - return ((S_IFMT & (m1 ^ m2)) == 0); + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); + return 0; } -static int -ensure_file_type (xlator_t *this, char *pathname, mode_t mode) +int +posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { - struct stat stbuf = {0,}; - int op_ret = 0; - int ret = -1; - - ret = lstat (pathname, &stbuf); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_CRITICAL, - "stat failed while trying to make sure entry %s " - "is a directory: %s", pathname, strerror (errno)); - goto out; + off_t in_case = -1; + size_t filled = 0; + int count = 0; + char entrybuf[sizeof(struct dirent) + 256 + 8]; + struct dirent *entry = NULL; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; + + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; } - if (!same_file_type (mode, stbuf.st_mode)) { - op_ret = -EEXIST; - gf_log (this->name, GF_LOG_CRITICAL, - "entry %s is a different type of file " - "than expected", pathname); - goto out; + if (!off) { + rewinddir (dir); + } else { + seekdir (dir, off); } - out: - return op_ret; -} -static int -create_entry (xlator_t *this, int32_t flags, - dir_entry_t *entry, char *pathname) -{ - int op_ret = 0; - int ret = -1; - struct timeval tv[2] = {{0,0},{0,0}}; - - if (S_ISDIR (entry->buf.st_mode)) { - /* - * If the entry is directory, create it by - * calling 'mkdir'. If the entry is already - * present, check if it is a directory, - * and issue a warning if otherwise. - */ + while (filled <= size) { + in_case = telldir (dir); - ret = mkdir (pathname, entry->buf.st_mode); - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, pathname, - entry->buf.st_mode); - } - else { - op_ret = -errno; - gf_log (this->name, GF_LOG_DEBUG, - "mkdir %s with mode (0%o) failed: %s", - pathname, entry->buf.st_mode, - strerror (errno)); - goto out; - } + if (in_case == -1) { + gf_log (THIS->name, GF_LOG_ERROR, + "telldir failed on dir=%p: %s", + dir, strerror (errno)); + goto out; } - } else if ((flags & GF_SET_IF_NOT_PRESENT) - || !(flags & GF_SET_DIR_ONLY)) { - - /* create a 0-byte file here */ - - if (S_ISREG (entry->buf.st_mode)) { - ret = open (pathname, O_CREAT|O_EXCL, - entry->buf.st_mode); + errno = 0; + entry = NULL; + readdir_r (dir, (struct dirent *)entrybuf, &entry); - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, - pathname, - entry->buf.st_mode); - } - else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "Error creating file %s with " - "mode (0%o): %s", - pathname, entry->buf.st_mode, - strerror (errno)); - goto out; - } + if (!entry) { + if (errno == EBADF) { + gf_log (THIS->name, GF_LOG_WARNING, + "readdir failed on dir=%p: %s", + dir, strerror (errno)); + goto out; } + break; + } - close (ret); - - } else if (S_ISLNK (entry->buf.st_mode)) { - ret = symlink (entry->link, pathname); - - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, - pathname, - entry->buf.st_mode); - } - else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "error creating symlink %s: %s" - , pathname, strerror (errno)); - goto out; - } - } +#ifdef __NetBSD__ + /* + * NetBSD with UFS1 backend uses backing files for + * extended attributes. They can be found in a + * .attribute file located at the root of the filesystem + * We hide it to glusterfs clients, since chaos will occur + * when the cluster/dht xlator decides to distribute + * exended attribute backing file accross storage servers. + */ + if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) + && (!strcmp(entry->d_name, ".attribute"))) + continue; +#endif /* __NetBSD__ */ + + if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) + && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) { + continue; + } - } else if (S_ISBLK (entry->buf.st_mode) || - S_ISCHR (entry->buf.st_mode) || - S_ISFIFO (entry->buf.st_mode) || - S_ISSOCK (entry->buf.st_mode)) { - - ret = mknod (pathname, entry->buf.st_mode, - entry->buf.st_dev); - - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, - pathname, - entry->buf.st_mode); - } else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "error creating device file " - "%s: %s", - pathname, strerror (errno)); - goto out; - } + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; } - } else { - gf_log (this->name, GF_LOG_ERROR, - "invalid mode 0%o for %s", entry->buf.st_mode, - pathname); - op_ret = -EINVAL; - goto out; - } - } - - /* - * Preserve atime and mtime - */ - - if (!S_ISLNK (entry->buf.st_mode)) { - tv[0].tv_sec = entry->buf.st_atime; - tv[1].tv_sec = entry->buf.st_mtime; - ret = utimes (pathname, tv); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "utimes %s failed: %s", - pathname, strerror (errno)); - goto out; - } - } - -out: - return op_ret; - -} - - -int -posix_setdents (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t flags, dir_entry_t *entries, - int32_t count) -{ - char * real_path = NULL; - char * entry_path = NULL; - int32_t real_path_len = -1; - int32_t entry_path_len = -1; - int32_t ret = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_fd * pfd = {0, }; - struct timeval tv[2] = {{0, }, {0, }}; - uint64_t tmp_pfd = 0; - char pathname[ZR_PATH_MAX] = {0,}; - dir_entry_t * trav = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (entries, out); + } - tv[0].tv_sec = tv[0].tv_usec = 0; - tv[1].tv_sec = tv[1].tv_usec = 0; + this_size = max (sizeof (gf_dirent_t), + sizeof (gfs3_dirplist)) + + strlen (entry->d_name) + 1; - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_ERROR, - "fd's ctx not found on fd=%p for %s", - fd, this->name); - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; + if (this_size + filled > size) { + seekdir (dir, in_case); + break; + } - real_path = pfd->path; + this_entry = gf_dirent_for_name (entry->d_name); - if (!real_path) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "path is NULL on pfd=%p fd=%p", pfd, fd); - goto out; - } + if (!this_entry) { + gf_log (THIS->name, GF_LOG_ERROR, + "could not create gf_dirent for entry %s: (%s)", + entry->d_name, strerror (errno)); + goto out; + } + this_entry->d_off = telldir (dir); + this_entry->d_ino = entry->d_ino; + this_entry->d_type = entry->d_type; - real_path_len = strlen (real_path); - entry_path_len = real_path_len + 256; - entry_path = CALLOC (1, entry_path_len); + list_add_tail (&this_entry->list, &entries->list); - if (!entry_path) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "out of memory :("); - goto out; + filled += this_size; + count ++; } - strcpy (entry_path, real_path); - entry_path[real_path_len] = '/'; + if ((!readdir (dir) && (errno == 0))) + /* Indicate EOF */ + errno = ENOENT; +out: + return count; +} - /* fd exists, and everything looks fine */ - /** - * create an entry for each one present in '@entries' - * - if flag is set (ie, if its namespace), create both directories - * and files - * - if not set, create only directories. - * - * after the entry is created, change the mode and ownership of the - * entry according to the stat present in entries->buf. - */ +dict_t * +posix_entry_xattr_fill (xlator_t *this, inode_t *inode, + fd_t *fd, char *name, dict_t *dict, + struct iatt *stbuf) +{ + loc_t tmp_loc = {0,}; + char *entry_path = NULL; - trav = entries->next; - while (trav) { - strcpy (pathname, entry_path); - strcat (pathname, trav->name); + /* if we don't send the 'loc', open-fd-count be a problem. */ + tmp_loc.inode = inode; - ret = create_entry (this, flags, trav, pathname); - if (ret < 0) { - op_errno = -ret; - goto out; - } + MAKE_HANDLE_PATH (entry_path, this, fd->inode->gfid, name); - /* TODO: handle another flag, GF_SET_OVERWRITE */ + return posix_lookup_xattr_fill (this, entry_path, + &tmp_loc, dict, stbuf); - /* Change the mode */ - if (!S_ISLNK (trav->buf.st_mode)) { - ret = chmod (pathname, trav->buf.st_mode); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chmod on %s failed: %s", pathname, - strerror (op_errno)); - goto out; - } - } +} - /* change the ownership */ - ret = lchown (pathname, trav->buf.st_uid, trav->buf.st_gid); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chmod on %s failed: %s", pathname, - strerror (op_errno)); - goto out; - } - if (flags & GF_SET_EPOCH_TIME) { - ret = utimes (pathname, tv); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "utimes on %s failed: %s", pathname, - strerror (op_errno)); - goto out; - } +int +posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) +{ + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = {0, }; + uuid_t gfid; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + + list_for_each_entry (entry, &entries->list, list) { + memset (gfid, 0, 16); + inode = inode_grep (fd->inode->table, fd->inode, + entry->d_name); + if (inode) + uuid_copy (gfid, inode->gfid); + + strcpy (&hpath[len+1], entry->d_name); + + posix_pstat (this, gfid, hpath, &stbuf); + + if (!inode) + inode = inode_find (itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new (itable); + + entry->inode = inode; + + if (dict) { + entry->dict = + posix_entry_xattr_fill (this, entry->inode, + fd, entry->d_name, + dict, &stbuf); + dict_ref (entry->dict); } - /* consider the next entry */ - trav = trav->next; + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + inode = NULL; } - op_ret = 0; - out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno); - if (entry_path) - FREE (entry_path); - - return 0; + return 0; } + int32_t -posix_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) +posix_do_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) { - int _fd = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct stat buf = {0,}; - struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; - int ret = -1; + struct posix_fd *pfd = NULL; + DIR *dir = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + int32_t skip_dirs = 0; - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + INIT_LIST_HEAD (&entries.list); + + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - _fd = pfd->fd; - - op_ret = fstat (_fd, &buf); + dir = pfd->dir; - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s", - strerror (op_errno)); + if (!dir) { + gf_log (this->name, GF_LOG_WARNING, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; goto out; - } + } - op_ret = 0; + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK (&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK (&fd->lock); - out: - SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &buf); - return 0; -} + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; -static int gf_posix_lk_log; + if (whichop != GF_FOP_READDIRP) + goto out; -int32_t -posix_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct flock *lock) -{ - struct flock nullock = {0, }; - frame->root->rsp_refs = NULL; + posix_readdirp_fill (this, fd, &entries, dict); - gf_posix_lk_log++; +out: + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR, - "\"features/posix-locks\" translator is " - "not loaded, you need to use it"); + gf_dirent_free (&entries); - STACK_UNWIND (frame, -1, ENOSYS, &nullock); return 0; } + int32_t -posix_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *lock) +posix_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) { - frame->root->rsp_refs = NULL; + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); + return 0; +} - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/posix-locks\" translator is not loaded. " - "You need to use it for proper functioning of GlusterFS"); - STACK_UNWIND (frame, -1, ENOSYS); +int32_t +posix_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *dict) +{ + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict); return 0; } int32_t -posix_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *lock) +posix_priv (xlator_t *this) { - frame->root->rsp_refs = NULL; + struct posix_private *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/posix-locks\" translator is not loaded. " - "You need to use it for proper functioning of GlusterFS"); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); + gf_proc_dump_add_section(key_prefix); - STACK_UNWIND (frame, -1, ENOSYS); - return 0; -} + if (!this) + return 0; + priv = this->private; -int32_t -posix_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) -{ - frame->root->rsp_refs = NULL; + if (!priv) + return 0; - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/posix-locks\" translator is not loaded. " - "You need to use it for proper functioning of GlusterFS"); + gf_proc_dump_write("base_path","%s", priv->base_path); + gf_proc_dump_write("base_path_length","%d", priv->base_path_length); + gf_proc_dump_write("max_read","%d", priv->read_value); + gf_proc_dump_write("max_write","%d", priv->write_value); + gf_proc_dump_write("nr_files","%ld", priv->nr_files); - STACK_UNWIND (frame, -1, ENOSYS); return 0; } int32_t -posix_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) +posix_inode (xlator_t *this) { - frame->root->rsp_refs = NULL; - - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/posix-locks\" translator is not loaded. " - " You need to use it for proper functioning of GlusterFS"); - - STACK_UNWIND (frame, -1, ENOSYS); return 0; } int32_t -posix_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) +posix_rchecksum (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset, int32_t len, dict_t *xdata) { - uint64_t tmp_pfd = 0; - struct posix_fd * pfd = NULL; - DIR * dir = NULL; - int ret = -1; - size_t filled = 0; - int count = 0; - - int32_t op_ret = -1; - int32_t op_errno = 0; - - gf_dirent_t * this_entry = NULL; - gf_dirent_t entries; - struct dirent * entry = NULL; - off_t in_case = -1; - int32_t this_size = -1; - + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + int32_t weak_checksum = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - INIT_LIST_HEAD (&entries.list); + priv = this->private; + memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { + op_errno = ENOMEM; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - dir = pfd->dir; - if (!dir) { - gf_log (this->name, GF_LOG_ERROR, - "dir is NULL for fd=%p", fd); - op_errno = EINVAL; + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; goto out; } + _fd = pfd->fd; - if (!off) { - rewinddir (dir); - } else { - seekdir (dir, off); - } + LOCK (&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect (fd, pfd, 0, offset, len); - while (filled <= size) { - in_case = telldir (dir); + ret = pread (_fd, buf, len, offset); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pread of %d bytes returned %d (%s)", + len, ret, strerror (errno)); - if (in_case == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "telldir failed: %s", - strerror (errno)); - goto out; } - errno = 0; - entry = readdir (dir); - - if (!entry) { - if (errno == EBADF) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "readdir failed: %s", - strerror (op_errno)); - goto out; - } - break; - } - - this_size = dirent_size (entry); - - if (this_size + filled > size) { - seekdir (dir, in_case); - break; - } - - - this_entry = gf_dirent_for_name (entry->d_name); - - if (!this_entry) { - gf_log (this->name, GF_LOG_ERROR, - "could not create gf_dirent for entry %s (%s)", - entry->d_name, strerror (errno)); - goto out; - } - this_entry->d_off = telldir (dir); - this_entry->d_ino = entry->d_ino; - - list_add_tail (&this_entry->list, &entries.list); - - filled += this_size; - count ++; } + UNLOCK (&fd->lock); - op_ret = count; + if (ret < 0) + goto out; - out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, &entries); + weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len); + gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum); - gf_dirent_free (&entries); + op_ret = 0; +out: + STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, + weak_checksum, strong_checksum, NULL); + + GF_FREE (alloc_buf); return 0; } +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ int32_t -posix_stats (call_frame_t *frame, xlator_t *this, - int32_t flags) - +notify (xlator_t *this, + int32_t event, + void *data, + ...) { - int32_t op_ret = -1; - int32_t op_errno = 0; - - struct xlator_stats xlstats = {0, }; - struct xlator_stats * stats = NULL; - struct statvfs buf = {0,}; - struct timeval tv = {0,}; - struct posix_private * priv = (struct posix_private *)this->private; - - int64_t avg_read = 0; - int64_t avg_write = 0; - int64_t _time_ms = 0; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - - stats = &xlstats; - - op_ret = statvfs (priv->base_path, &buf); - - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s", - strerror (op_errno)); - goto out; + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that posix xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); } - - /* client info is maintained at FSd */ - stats->nr_clients = priv->stats.nr_clients; - stats->nr_files = priv->stats.nr_files; - - /* number of free block in the filesystem. */ - stats->free_disk = buf.f_bfree * buf.f_bsize; - - stats->total_disk_size = buf.f_blocks * buf.f_bsize; - stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize; - - /* Calculate read and write usage */ - op_ret = gettimeofday (&tv, NULL); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "gettimeofday failed: %s", strerror (errno)); - goto out; + break; + default: + /* */ + break; } + return 0; +} - /* Read */ - _time_ms = (tv.tv_sec - priv->init_time.tv_sec) * 1000 + - ((tv.tv_usec - priv->init_time.tv_usec) / 1000); +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; - avg_read = (_time_ms) ? (priv->read_value / _time_ms) : 0; /* KBps */ - avg_write = (_time_ms) ? (priv->write_value / _time_ms) : 0; /* KBps */ + if (!this) + return ret; - _time_ms = (tv.tv_sec - priv->prev_fetch_time.tv_sec) * 1000 + - ((tv.tv_usec - priv->prev_fetch_time.tv_usec) / 1000); + ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); - if (_time_ms && ((priv->interval_read / _time_ms) > priv->max_read)) { - priv->max_read = (priv->interval_read / _time_ms); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; } - if (_time_ms && - ((priv->interval_write / _time_ms) > priv->max_write)) { - priv->max_write = priv->interval_write / _time_ms; - } + return ret; +} - stats->read_usage = avg_read / priv->max_read; - stats->write_usage = avg_write / priv->max_write; +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; - op_ret = gettimeofday (&(priv->prev_fetch_time), NULL); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "gettimeofday failed: %s", - strerror (op_errno)); - goto out; - } + priv = this->private; - priv->interval_read = 0; - priv->interval_write = 0; + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "uid/gid for brick path %s, %s", + priv->base_path, strerror (errno)); - op_ret = 0; + return ret; +} - out: - SET_TO_OLD_FS_ID (); - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, stats); - return 0; +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; } -int32_t -posix_checksum (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flag) + +int +reconfigure (xlator_t *this, dict_t *options) { - char * real_path = NULL; - DIR * dir = NULL; - struct dirent * dirent = NULL; - uint8_t file_checksum[ZR_FILENAME_MAX] = {0,}; - uint8_t dir_checksum[ZR_FILENAME_MAX] = {0,}; - int32_t op_ret = -1; - int32_t op_errno = 0; - int i = 0; - int length = 0; + int ret = -1; + struct posix_private *priv = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str = NULL; - struct stat buf = {0,}; - char tmp_real_path[ZR_PATH_MAX] = {0,}; - int ret = -1; + priv = this->private; - MAKE_REAL_PATH (real_path, this, loc->path); + GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); + posix_set_owner (this, uid, gid); - dir = opendir (real_path); + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); - if (!dir){ - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "opendir() failed on `%s': %s", - real_path, strerror (op_errno)); - goto out; - } + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); - while ((dirent = readdir (dir))) { - errno = 0; - if (!dirent) { - if (errno != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "readdir() failed: %s", - strerror (errno)); - goto out; - } - break; - } + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } - length = strlen (dirent->d_name); + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); - strcpy (tmp_real_path, real_path); - strcat (tmp_real_path, "/"); - strcat (tmp_real_path, dirent->d_name); - ret = lstat (tmp_real_path, &buf); + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); - if (ret == -1) - continue; + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); - if (S_ISDIR (buf.st_mode)) { - for (i = 0; i < length; i++) - dir_checksum[i] ^= dirent->d_name[i]; - } else { - for (i = 0; i < length; i++) - file_checksum[i] ^= dirent->d_name[i]; - } + if (priv->node_uuid_pathinfo && + (uuid_is_null (priv->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); } - closedir (dir); - op_ret = 0; + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + posix_spawn_health_check_thread (this); - out: - frame->root->rsp_refs = NULL; - STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum); - - return 0; + ret = 0; +out: + return ret; } -/** - * notify - when parent sends PARENT_UP, send CHILD_UP event from here - */ -int32_t -notify (xlator_t *this, - int32_t event, - void *data, - ...) -{ - switch (event) - { - case GF_EVENT_PARENT_UP: - { - /* Tell the parent that posix xlator is up */ - default_notify (this, GF_EVENT_CHILD_UP, data); - } - break; - default: - /* */ - break; - } - return 0; -} /** * init - @@ -3825,158 +4646,291 @@ notify (xlator_t *this, int init (xlator_t *this) { - int ret = 0; - int op_ret = -1; - gf_boolean_t tmp_bool = 0; - struct stat buf = {0,}; - struct posix_private * _private = NULL; - data_t * dir_data = NULL; - data_t * tmp_data = NULL; + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = {0,}; + gf_boolean_t tmp_bool = 0; + int dict_ret = 0; + int ret = 0; + int op_ret = -1; + ssize_t size = -1; + int32_t janitor_sleep = 0; + uuid_t old_uuid = {0,}; + uuid_t dict_uuid = {0,}; + uuid_t gfid = {0,}; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + char *guuid = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); if (this->children) { - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_CRITICAL, "FATAL: storage/posix cannot have subvolumes"); ret = -1; goto out; } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling. Please check the volume file."); + } if (!dir_data) { - gf_log (this->name, GF_LOG_ERROR, - "export directory not specified in volfile"); + gf_log (this->name, GF_LOG_CRITICAL, + "Export directory not specified in volume file."); ret = -1; goto out; } umask (000); // umask `masking' is done at the client side - /* Check whether the specified directory exists, if not create it. */ - op_ret = lstat (dir_data->data, &buf); - if ((ret != 0) || !S_ISDIR (buf.st_mode)) { + /* Check whether the specified directory exists, if not log it. */ + op_ret = stat (dir_data->data, &buf); + if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { gf_log (this->name, GF_LOG_ERROR, - "directory '%s' doesn't exists, Exiting", - dir_data->data); + "Directory '%s' doesn't exist, exiting.", + dir_data->data); ret = -1; goto out; } - /* Check for Extended attribute support, if not present, log it */ op_ret = sys_lsetxattr (dir_data->data, - "trusted.glusterfs.test", "working", 8, 0); - if (op_ret < 0) { - tmp_data = dict_get (this->options, - "mandate-attribute"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &tmp_bool) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for key " - "\"mandate-xattr\""); - ret = -1; - goto out; - } - if (!tmp_bool) { - gf_log (this->name, GF_LOG_WARNING, - "Extended attribute not supported, " - "starting as per option"); - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Extended attribute not supported, " - "exiting"); - ret = -1; - goto out; - } - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Extended attribute not supported, exiting"); - ret = -1; - goto out; - } + "trusted.glusterfs.test", "working", 8, 0); + if (op_ret == 0) { + sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); + } else { + tmp_data = dict_get (this->options, + "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &tmp_bool) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "wrong option provided for key " + "\"mandate-attribute\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_log (this->name, GF_LOG_WARNING, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Extended attribute not supported, " + "exiting."); + ret = -1; + goto out; + } + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Extended attribute not supported, exiting."); + ret = -1; + goto out; + } } - _private = CALLOC (1, sizeof (*_private)); + tmp_data = dict_get (this->options, "volume-id"); + if (tmp_data) { + op_ret = uuid_parse (tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "wrong volume-id (%s) set in volume file", + tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { + if (uuid_compare (old_uuid, dict_uuid)) { + gf_log (this->name, GF_LOG_ERROR, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa (old_uuid)); + ret = -1; + goto out; + } + } else if ((size == -1) && (errno == ENODATA)) { + + gf_log (this->name, GF_LOG_ERROR, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fetch volume-id (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } else { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid (gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa (gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: wrong value set as gfid", + dir_data->data); + ret = -1; + goto out; + } else if ((size == -1) && (errno != ENODATA)) { + /* Wrong 'gfid' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fetch gfid (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + 16, XATTR_CREATE); + if (size) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set gfid (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } + } + + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) + gf_log (this->name, GF_LOG_WARNING, + "Posix access control list is not supported."); + + ret = 0; + _private = GF_CALLOC (1, sizeof (*_private), + gf_posix_mt_posix_private); if (!_private) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); ret = -1; goto out; } - _private->base_path = strdup (dir_data->data); + _private->base_path = gf_strdup (dir_data->data); _private->base_path_length = strlen (_private->base_path); - { - /* Stats related variables */ - gettimeofday (&_private->init_time, NULL); - gettimeofday (&_private->prev_fetch_time, NULL); - _private->max_read = 1; - _private->max_write = 1; + LOCK_INIT (&_private->lock); + + ret = dict_get_str (this->options, "hostname", &_private->hostname); + if (ret) { + _private->hostname = GF_CALLOC (256, sizeof (char), + gf_common_mt_char); + if (!_private->hostname) { + goto out; + } + ret = gethostname (_private->hostname, 256); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not find hostname (%s)", strerror (errno)); + } } _private->export_statfs = 1; tmp_data = dict_get (this->options, "export-statfs-size"); if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->export_statfs) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "'export-statfs-size' takes only boolean " - "options"); - goto out; - } + if (gf_string2boolean (tmp_data->data, + &_private->export_statfs) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } if (!_private->export_statfs) gf_log (this->name, GF_LOG_DEBUG, - "'statfs()' returns dummy size"); + "'statfs()' returns dummy size"); } - tmp_data = dict_get (this->options, "o-direct"); + _private->background_unlink = 0; + tmp_data = dict_get (this->options, "background-unlink"); if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->o_direct) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for 'o-direct'"); - goto out; - } - if (_private->o_direct) + if (gf_string2boolean (tmp_data->data, + &_private->background_unlink) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "'background-unlink' takes only boolean " + "options"); + goto out; + } + + if (_private->background_unlink) gf_log (this->name, GF_LOG_DEBUG, - "o-direct mode is enabled (O_DIRECT " - "for every open)"); + "unlinks will be performed in background"); } - _private->num_devices_to_span = 1; - - tmp_data = dict_get (this->options, "span-devices"); + tmp_data = dict_get (this->options, "o-direct"); if (tmp_data) { - if (gf_string2int32 (tmp_data->data, - &_private->num_devices_to_span) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for 'span-devices'"); - goto out; - } - if (_private->num_devices_to_span > 1) { - gf_log (this->name, GF_LOG_INFO, - "spaning enabled accross %d mounts", - _private->num_devices_to_span); - _private->span_devices = 1; + if (gf_string2boolean (tmp_data->data, + &_private->o_direct) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "wrong option provided for 'o-direct'"); + goto out; } - if (_private->num_devices_to_span < 1) - _private->num_devices_to_span = 1; + if (_private->o_direct) + gf_log (this->name, GF_LOG_DEBUG, + "o-direct mode is enabled (O_DIRECT " + "for every open)"); + } + + ret = dict_get_str (this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (uuid_parse (guuid, _private->glusterd_uuid)) + gf_log (this->name, GF_LOG_WARNING, "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_log (this->name, GF_LOG_DEBUG, "No glusterd (node) UUID " + "passed - node-uuid xattr request will return " + "\"No such attribute\""); } - _private->st_device = CALLOC (1, (sizeof (dev_t) * - _private->num_devices_to_span)); - - /* Start with the base */ - _private->st_device[0] = buf.st_dev; + ret = 0; + + _private->janitor_sleep_duration = 600; + + dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", + &janitor_sleep); + if (dict_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Setting janitor sleep duration to %d.", + janitor_sleep); + _private->janitor_sleep_duration = janitor_sleep; + } + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = opendir (dir_data->data); + if (!_private->mount_lock) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Could not lock brick directory"); + goto out; + } #ifndef GF_DARWIN_HOST_OS { struct rlimit lim; @@ -3985,27 +4939,106 @@ init (xlator_t *this) if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { gf_log (this->name, GF_LOG_WARNING, - "WARNING: Failed to set 'ulimit -n " - " 1048576': %s", strerror(errno)); + "Failed to set 'ulimit -n " + " 1048576': %s", strerror(errno)); lim.rlim_cur = 65536; lim.rlim_max = 65536; if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set max open fd to " - "64k: %s", strerror(errno)); + gf_log (this->name, GF_LOG_WARNING, + "Failed to set maximum allowed open " + "file descriptors to 64k: %s", + strerror(errno)); } else { - gf_log (this->name, GF_LOG_ERROR, - "max open fd set to 64k"); + gf_log (this->name, GF_LOG_INFO, + "Maximum allowed open file descriptors " + "set to 65536"); } } } #endif - this->private = (void *)_private; - out: + op_ret = posix_handle_init (this); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, uint32, out); + GF_OPTION_INIT ("brick-gid", gid, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (uuid_is_null (_private->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + + pthread_mutex_init (&_private->janitor_lock, NULL); + pthread_cond_init (&_private->janitor_cond, NULL); + INIT_LIST_HEAD (&_private->janitor_fds); + + posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); +out: return ret; } @@ -4013,13 +5046,19 @@ void fini (xlator_t *this) { struct posix_private *priv = this->private; - sys_lremovexattr (priv->base_path, "trusted.glusterfs.test"); - FREE (priv); + if (!priv) + return; + this->private = NULL; + /*unlock brick dir*/ + if (priv->mount_lock) + closedir (priv->mount_lock); + GF_FREE (priv); return; } -struct xlator_mops mops = { - .stats = posix_stats, +struct xlator_dumpops dumpops = { + .priv = posix_priv, + .inode = posix_inode, }; struct xlator_fops fops = { @@ -4027,6 +5066,7 @@ struct xlator_fops fops = { .stat = posix_stat, .opendir = posix_opendir, .readdir = posix_readdir, + .readdirp = posix_readdirp, .readlink = posix_readlink, .mknod = posix_mknod, .mkdir = posix_mkdir, @@ -4035,10 +5075,7 @@ struct xlator_fops fops = { .symlink = posix_symlink, .rename = posix_rename, .link = posix_link, - .chmod = posix_chmod, - .chown = posix_chown, .truncate = posix_truncate, - .utimens = posix_utimens, .create = posix_create, .open = posix_open, .readv = posix_readv, @@ -4051,40 +5088,105 @@ struct xlator_fops fops = { .getxattr = posix_getxattr, .fgetxattr = posix_fgetxattr, .removexattr = posix_removexattr, + .fremovexattr = posix_fremovexattr, .fsyncdir = posix_fsyncdir, .access = posix_access, .ftruncate = posix_ftruncate, .fstat = posix_fstat, .lk = posix_lk, - .inodelk = posix_inodelk, - .finodelk = posix_finodelk, - .entrylk = posix_entrylk, - .fentrylk = posix_fentrylk, - .fchown = posix_fchown, - .fchmod = posix_fchmod, - .setdents = posix_setdents, - .getdents = posix_getdents, - .checksum = posix_checksum, - .xattrop = posix_xattrop, - .fxattrop = posix_fxattrop, + .inodelk = posix_inodelk, + .finodelk = posix_finodelk, + .entrylk = posix_entrylk, + .fentrylk = posix_fentrylk, + .rchecksum = posix_rchecksum, + .xattrop = posix_xattrop, + .fxattrop = posix_fxattrop, + .setattr = posix_setattr, + .fsetattr = posix_fsetattr, + .fallocate = _posix_fallocate, + .discard = posix_discard, + .zerofill = posix_zerofill, }; struct xlator_cbks cbks = { - .release = posix_release, - .releasedir = posix_releasedir, - .forget = posix_forget + .release = posix_release, + .releasedir = posix_releasedir, + .forget = posix_forget }; struct volume_options options[] = { - { .key = {"o-direct"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"directory"}, - .type = GF_OPTION_TYPE_PATH }, - { .key = {"export-statfs-size"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"mandate-attribute"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"span-devices"}, - .type = GF_OPTION_TYPE_INT }, - { .key = {NULL} } + { .key = {"o-direct"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"directory"}, + .type = GF_OPTION_TYPE_PATH }, + { .key = {"hostname"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"mandate-attribute"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"background-unlink"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"janitor-sleep-duration"}, + .type = GF_OPTION_TYPE_INT }, + { .key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"glusterd-uuid"}, + .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO" + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting uid of brick's owner" + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting gid of brick's owner" + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname" + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable" + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, + { .key = {NULL} } }; |
