diff options
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 5475 |
1 files changed, 2734 insertions, 2741 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index f1541a2da..fb45c7a67 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -24,6 +14,7 @@ #define __XOPEN_SOURCE 500 +#include <openssl/md5.h> #include <stdint.h> #include <sys/time.h> #include <sys/resource.h> @@ -31,13 +22,19 @@ #include <libgen.h> #include <pthread.h> #include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> #endif /* GF_BSD_HOST_OS */ +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + #include "glusterfs.h" -#include "md5.h" #include "checksum.h" #include "dict.h" #include "logging.h" @@ -52,18 +49,25 @@ #include "statedump.h" #include "locking.h" #include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 #undef HAVE_SET_FSID #ifdef HAVE_SET_FSID #define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; -#define SET_FS_ID(uid, gid) do { \ +#define SET_FS_ID(uid, gid) do { \ old_fsuid = setfsuid (uid); \ old_fsgid = setfsgid (gid); \ } while (0) -#define SET_TO_OLD_FS_ID() do { \ +#define SET_TO_OLD_FS_ID() do { \ setfsuid (old_fsuid); \ setfsgid (old_fsgid); \ } while (0) @@ -76,451 +80,106 @@ #endif -typedef struct { - xlator_t *this; - const char *real_path; - dict_t *xattr; - struct stat *stbuf; - loc_t *loc; -} posix_xattr_filler_t; - int posix_forget (xlator_t *this, inode_t *inode) { - uint64_t tmp_cache = 0; - if (!inode_ctx_del (inode, this, &tmp_cache)) - dict_destroy ((dict_t *)(long)tmp_cache); - - return 0; -} - -static void -_posix_xattr_get_set (dict_t *xattr_req, - char *key, - data_t *data, - void *xattrargs) -{ - posix_xattr_filler_t *filler = xattrargs; - char *value = NULL; - ssize_t xattr_size = -1; - int ret = -1; - char *databuf = NULL; - int _fd = -1; - loc_t *loc = NULL; - ssize_t req_size = 0; - - - /* should size be put into the data_t ? */ - if (!strcmp (key, "glusterfs.content") - && S_ISREG (filler->stbuf->st_mode)) { - - /* file content request */ - req_size = data_to_uint64 (data); - if (req_size >= filler->stbuf->st_size) { - _fd = open (filler->real_path, O_RDONLY); - - if (_fd == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Opening file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - databuf = calloc (1, filler->stbuf->st_size); - - if (!databuf) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Out of memory."); - goto err; - } - - ret = read (_fd, databuf, filler->stbuf->st_size); - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Read on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = close (_fd); - _fd = -1; - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Close on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = dict_set_bin (filler->xattr, key, - databuf, filler->stbuf->st_size); - if (ret < 0) { - goto err; - } - - /* To avoid double free in cleanup below */ - databuf = NULL; - err: - if (_fd != -1) - close (_fd); - if (databuf) - FREE (databuf); - } - } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { - loc = filler->loc; - if (!list_empty (&loc->inode->fd_list)) { - ret = dict_set_uint32 (filler->xattr, key, 1); - } else { - ret = dict_set_uint32 (filler->xattr, key, 0); - } - } else { - xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0); - - if (xattr_size > 0) { - value = calloc (1, xattr_size + 1); - - sys_lgetxattr (filler->real_path, key, value, - xattr_size); - - value[xattr_size] = '\0'; - ret = dict_set_bin (filler->xattr, key, - value, xattr_size); - if (ret < 0) - gf_log (filler->this->name, GF_LOG_DEBUG, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - } - } -} - - -static int -posix_scale_st_ino (struct posix_private *priv, struct stat *buf) -{ - int i = 0; - int ret = -1; - ino_t temp_ino = 0; - int r; - struct stat export_buf; - - for (i = 0; i < priv->num_devices_to_span; i++) { - if (buf->st_dev == priv->st_device[i]) { - break; - } - if (priv->st_device[i] == 0) { - priv->st_device[i] = buf->st_dev; - break; - } - } - - if (i == priv->num_devices_to_span) { - r = lstat (priv->base_path, &export_buf); - if ((r != 0) || (buf->st_dev != export_buf.st_dev)) { - goto out; - } - - gf_log (THIS->name, GF_LOG_WARNING, - "device number for exported volume %s has changed " - "since init --- assuming done by automount", - priv->base_path); - - priv->st_device[0] = export_buf.st_dev; - } - - temp_ino = (buf->st_ino * priv->num_devices_to_span) + i; - - buf->st_ino = temp_ino; - - ret = 0; -out: - return ret; -} - - -int -posix_lstat_with_gen (xlator_t *this, const char *path, struct stat *stbuf_p) -{ - struct posix_private *priv = NULL; - int ret = 0; - char gen_key[1024] = {0, }; - uint64_t gen_val_be = 0; - uint64_t gen_val = 0; - struct stat stbuf = {0, }; - - priv = this->private; - - ret = lstat (path, &stbuf); - if (ret == -1) - return -1; - - ret = posix_scale_st_ino (priv, &stbuf); - if ((ret == -1) && !strcmp (path, "..")) { - /* stat on ../ might land us outside the export directory, - so don't panic */ - - gf_log (this->name, GF_LOG_WARNING, - "Access to %s (on dev %lld) is crossing device (%lld)", - path, (unsigned long long) stbuf.st_dev, - (unsigned long long) priv->st_device[0]); - errno = EXDEV; - return -1; - } - -#ifndef GF_LINUX_HOST_OS - if (!S_ISDIR (stbuf.st_mode) && !S_ISREG (stbuf.st_mode)) { - stbuf.st_dev = (typeof(stbuf.st_dev))stbuf.st_mtime; - if (stbuf_p) - *stbuf_p = stbuf; - return 0; - } -#endif /* !GF_LINUX_HOST_OS */ - - ret = snprintf (gen_key, 1024, "trusted.%s.gen", this->name); - - if (ret == 1024) - return -1; - - ret = sys_lgetxattr (path, gen_key, (void *) &gen_val_be, - sizeof (gen_val_be)); - if (ret == -1) { - LOCK (&priv->gen_lock); - { - gen_val = ++priv->gen_seq; - } - UNLOCK (&priv->gen_lock); - - gen_val_be = hton64 (gen_val); - - ret = sys_lsetxattr (path, gen_key, &gen_val_be, - sizeof (gen_val_be), 0); - } else { - gen_val = ntoh64 (gen_val_be); - } - - if (ret >= 0) { - ret = 0; - stbuf.st_dev = (typeof(stbuf.st_dev))gen_val; - if (stbuf_p) - *stbuf_p = stbuf; - } - - return ret; -} - - -int -posix_fstat_with_gen (xlator_t *this, int fd, struct stat *stbuf_p) -{ - struct posix_private *priv = NULL; - int ret = 0; - char gen_key[1024] = {0, }; - uint64_t gen_val_be = 0; - uint64_t gen_val = 0; - struct stat stbuf = {0, }; - - priv = this->private; - - ret = fstat (fd, &stbuf); - if (ret == -1) - return -1; - - ret = posix_scale_st_ino (priv, &stbuf); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "Access to fd %d (on dev %lld) is crossing device (%lld)", - fd, (unsigned long long) stbuf.st_dev, - (unsigned long long) priv->st_device[0]); - errno = EXDEV; - return -1; - } - -#ifndef GF_LINUX_HOST_OS - if (!S_ISDIR (stbuf.st_mode) && !S_ISREG (stbuf.st_mode)) { - stbuf.st_dev = (typeof(stbuf.st_dev))stbuf.st_mtime; - return 0; - } -#endif /* !GF_LINUX_HOST_OS */ - - ret = snprintf (gen_key, 1024, "trusted.%s.gen", this->name); - - if (ret == 1024) - return -1; - - ret = sys_fgetxattr (fd, gen_key, (void *) &gen_val_be, - sizeof (gen_val_be)); - if (ret == -1) { - LOCK (&priv->gen_lock); - { - gen_val = ++priv->gen_seq; - } - UNLOCK (&priv->gen_lock); - - gen_val_be = hton64 (gen_val); - - ret = sys_fsetxattr (fd, gen_key, &gen_val_be, - sizeof (gen_val_be), 0); - } else { - gen_val = ntoh64 (gen_val_be); - } - - if (ret >= 0) { - ret = 0; - stbuf.st_dev = (typeof(stbuf.st_dev))gen_val; - if (stbuf_p) - *stbuf_p = stbuf; - } - - return ret; -} - - -dict_t * -posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc, - dict_t *xattr_req, struct stat *buf) -{ - dict_t *xattr = NULL; - posix_xattr_filler_t filler = {0, }; - - xattr = get_new_dict(); - if (!xattr) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - - filler.this = this; - filler.real_path = real_path; - filler.xattr = xattr; - filler.stbuf = buf; - filler.loc = loc; - - dict_foreach (xattr_req, _posix_xattr_get_set, &filler); -out: - return xattr; -} - - -/* - * If the parent directory of {real_path} has the setgid bit set, - * then set {gid} to the gid of the parent. Otherwise, - * leave {gid} unchanged. - */ - -int -setgid_override (xlator_t *this, char *real_path, gid_t *gid) -{ - char * tmp_path = NULL; - char * parent_path = NULL; - struct stat parent_stbuf; + uint64_t tmp_cache = 0; + if (!inode_ctx_del (inode, this, &tmp_cache)) + dict_destroy ((dict_t *)(long)tmp_cache); - int op_ret = 0; - - tmp_path = strdup (real_path); - if (!tmp_path) { - op_ret = -ENOMEM; - gf_log ("[storage/posix]", GF_LOG_ERROR, - "Out of memory"); - goto out; - } - - parent_path = dirname (tmp_path); - - op_ret = posix_lstat_with_gen (this, parent_path, &parent_stbuf); - - if (op_ret == -1) { - op_ret = -errno; - gf_log ("[storage/posix]", GF_LOG_ERROR, - "lstat on parent directory (%s) failed: %s", - parent_path, strerror (errno)); - goto out; - } - - if (parent_stbuf.st_mode & S_ISGID) { - /* - Entries created inside a setgid directory - should inherit the gid from the parent - */ - - *gid = parent_stbuf.st_gid; - } -out: - - if (tmp_path) - FREE (tmp_path); - - return op_ret; + return 0; } +/* Regular fops */ int32_t posix_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) + loc_t *loc, dict_t *xdata) { - struct stat buf = {0, }; - char * real_path = NULL; + struct iatt buf = {0, }; int32_t op_ret = -1; int32_t entry_ret = 0; int32_t op_errno = 0; dict_t * xattr = NULL; - char * pathdup = NULL; - char * parentpath = NULL; - struct stat postparent = {0,}; - struct posix_private *priv = NULL; + char * real_path = NULL; + char * par_path = NULL; + struct iatt postparent = {0,}; + int32_t gfidless = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); - MAKE_REAL_PATH (real_path, this, loc->path); + /* The Hidden directory should be for housekeeping purpose and it + should not get any gfid on it */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "Lookup issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); + op_ret = -1; + if (uuid_is_null (loc->pargfid)) { + /* nameless lookup */ + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + } else { + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); - priv = this->private; + if (uuid_is_null (loc->inode->gfid)) { + posix_gfid_heal (this, real_path, loc, xdata); + MAKE_ENTRY_HANDLE (real_path, par_path, this, + loc, &buf); + } + } - op_ret = posix_lstat_with_gen (this, real_path, &buf); op_errno = errno; if (op_ret == -1) { - if (op_errno != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", - loc->path, strerror (op_errno)); - } + if (op_errno != ENOENT) { + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", + real_path, strerror (op_errno)); + } entry_ret = -1; goto parent; } - if (xattr_req && (op_ret == 0)) { - xattr = posix_lookup_xattr_fill (this, real_path, loc, - xattr_req, &buf); + if (xdata && (op_ret == 0)) { + xattr = posix_lookup_xattr_fill (this, real_path, loc, + xdata, &buf); } parent: - if (loc->parent) { - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - - parentpath = dirname (pathdup); - - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + if (par_path) { + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } } op_ret = entry_ret; out: - if (pathdup) - FREE (pathdup); - if (xattr) dict_ref (xattr); + if (!op_ret && !gfidless && uuid_is_null (buf.ia_gfid)) { + gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for " + "%s", (real_path) ? real_path: ""); + op_ret = -1; + op_errno = ENODATA; + } STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, - loc->inode, &buf, xattr, &postparent); + (loc)?loc->inode:NULL, &buf, xattr, &postparent); if (xattr) dict_unref (xattr); @@ -530,15 +189,13 @@ out: int32_t -posix_stat (call_frame_t *frame, - xlator_t *this, - loc_t *loc) +posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - struct stat buf = {0,}; - char * real_path = NULL; + struct iatt buf = {0,}; int32_t op_ret = -1; int32_t op_errno = 0; - struct posix_private *priv = NULL; + struct posix_private *priv = NULL; + char *real_path = NULL; DECLARE_OLD_FS_ID_VAR; @@ -550,45 +207,68 @@ posix_stat (call_frame_t *frame, VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = posix_lstat_with_gen (this, real_path, &buf); + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", loc->path, + gf_log (this->name, (op_errno == ENOENT)? + GF_LOG_DEBUG:GF_LOG_ERROR, + "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL); return 0; } static int -posix_do_chmod (xlator_t *this, - const char *path, - struct stat *stbuf) +posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf) { - int32_t ret = -1; + int32_t ret = -1; + mode_t mode = 0; + struct stat stat; + int is_symlink = 0; - ret = lchmod (path, stbuf->st_mode); - if ((ret == -1) && (errno == ENOSYS)) { - ret = chmod (path, stbuf->st_mode); + ret = sys_lstat (path, &stat); + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, + "lstat failed: %s (%s)", path, strerror (errno)); + goto out; } + if (S_ISLNK (stat.st_mode)) + is_symlink = 1; + + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + ret = lchmod (path, mode); + if ((ret == -1) && (errno == ENOSYS)) { + /* in Linux symlinks are always in mode 0777 and no + such call as lchmod exists. + */ + gf_log (this->name, GF_LOG_DEBUG, + "%s (%s)", path, strerror (errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = chmod (path, mode); + } +out: return ret; } static int posix_do_chown (xlator_t *this, const char *path, - struct stat *stbuf, + struct iatt *stbuf, int32_t valid) { int32_t ret = -1; @@ -596,10 +276,10 @@ posix_do_chown (xlator_t *this, gid_t gid = -1; if (valid & GF_SET_ATTR_UID) - uid = stbuf->st_uid; + uid = stbuf->ia_uid; if (valid & GF_SET_ATTR_GID) - gid = stbuf->st_gid; + gid = stbuf->ia_gid; ret = lchown (path, uid, gid); @@ -609,30 +289,53 @@ posix_do_chown (xlator_t *this, static int posix_do_utimes (xlator_t *this, const char *path, - struct stat *stbuf) + struct iatt *stbuf) { int32_t ret = -1; struct timeval tv[2] = {{0,},{0,}}; + struct stat stat; + int is_symlink = 0; - tv[0].tv_sec = stbuf->st_atime; - tv[0].tv_usec = ST_ATIM_NSEC (stbuf) / 1000; - tv[1].tv_sec = stbuf->st_mtime; - tv[1].tv_usec = ST_MTIM_NSEC (stbuf) / 1000; + ret = sys_lstat (path, &stat); + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, + "%s (%s)", path, strerror (errno)); + goto out; + } + + if (S_ISLNK (stat.st_mode)) + is_symlink = 1; - ret = utimes (path, tv); + tv[0].tv_sec = stbuf->ia_atime; + tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; + tv[1].tv_sec = stbuf->ia_mtime; + tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; + ret = lutimes (path, tv); + if ((ret == -1) && (errno == ENOSYS)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (%s)", path, strerror (errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = utimes (path, tv); + } + +out: return ret; } int posix_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct stat *stbuf, int32_t valid) + loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = 0; - struct stat statpre = {0,}; - struct stat statpost = {0,}; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -641,9 +344,8 @@ posix_setattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, &statpre); - op_ret = posix_lstat_with_gen (this, real_path, &statpre); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -697,7 +399,7 @@ posix_setattr (call_frame_t *frame, xlator_t *this, } } - op_ret = posix_lstat_with_gen (this, real_path, &statpost); + op_ret = posix_pstat (this, loc->gfid, real_path, &statpost); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -712,7 +414,7 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, - &statpre, &statpost); + &statpre, &statpost, NULL); return 0; } @@ -720,7 +422,7 @@ out: int32_t posix_do_fchown (xlator_t *this, int fd, - struct stat *stbuf, + struct iatt *stbuf, int32_t valid) { int ret = -1; @@ -728,10 +430,10 @@ posix_do_fchown (xlator_t *this, gid_t gid = -1; if (valid & GF_SET_ATTR_UID) - uid = stbuf->st_uid; + uid = stbuf->ia_uid; if (valid & GF_SET_ATTR_GID) - gid = stbuf->st_gid; + gid = stbuf->ia_gid; ret = fchown (fd, uid, gid); @@ -741,30 +443,34 @@ posix_do_fchown (xlator_t *this, int32_t posix_do_fchmod (xlator_t *this, - int fd, struct stat *stbuf) + int fd, struct iatt *stbuf) { - return fchmod (fd, stbuf->st_mode); + mode_t mode = 0; + + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + return fchmod (fd, mode); } static int posix_do_futimes (xlator_t *this, int fd, - struct stat *stbuf) + struct iatt *stbuf) { + gf_log (this->name, GF_LOG_WARNING, "function not implemented fd(%d)", fd); + errno = ENOSYS; return -1; } int posix_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct stat *stbuf, int32_t valid) + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - struct stat statpre = {0,}; - struct stat statpost = {0,}; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; struct posix_fd *pfd = NULL; - uint64_t tmp_pfd = 0; int32_t ret = -1; DECLARE_OLD_FS_ID_VAR; @@ -775,16 +481,15 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - op_ret = posix_fstat_with_gen (this, pfd->fd, &statpre); + op_ret = posix_fdstat (this, pfd->fd, &statpre); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -839,7 +544,7 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this, } } - op_ret = posix_fstat_with_gen (this, pfd->fd, &statpost); + op_ret = posix_fdstat (this, pfd->fd, &statpost); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -854,283 +559,375 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, - &statpre, &statpost); + &statpre, &statpost, NULL); return 0; } -int32_t -posix_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd) +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) { - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - DIR * dir = NULL; - struct posix_fd * pfd = NULL; + struct posix_fd *pfd = NULL; + int32_t ret = -1; DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); VALIDATE_OR_GOTO (fd, out); - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - - dir = opendir (real_path); + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } - if (dir == NULL) { - op_errno = errno; + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; gf_log (this->name, GF_LOG_ERROR, - "opendir failed on %s: %s", - loc->path, strerror (op_errno)); + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); goto out; } - op_ret = dirfd (dir); - if (op_ret < 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "dirfd() failed on %s: %s", - loc->path, strerror (op_errno)); + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; goto out; } - pfd = CALLOC (1, sizeof (*fd)); - if (!pfd) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - - pfd->dir = dir; - pfd->fd = dirfd (dir); - pfd->path = strdup (real_path); - if (!pfd->path) { + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); goto out; } - fd_ctx_set (fd, this, (uint64_t)(long)pfd); +out: + SET_TO_OLD_FS_ID (); - op_ret = 0; + return ret; +} - out: - if (op_ret == -1) { - if (dir) { - closedir (dir); - dir = NULL; - } - if (pfd) { - if (pfd->path) - FREE (pfd->path); - FREE (pfd); - pfd = NULL; - } - } +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; - SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd); - return 0; + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; } +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ + size_t num_vect = 0; + int32_t num_loop = 1; + int32_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + size_t remain = 0; + size_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size ; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC (num_vect, sizeof(struct iovec), + gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, + "memory alloc failed, vect_size %d: %s", + vect_size, strerror(errno)); + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC (vect_size, sizeof(char), + gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + lseek(fd, offset, SEEK_SET); + for (idx = 0; idx < num_loop; idx++) { + op_ret = writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + } + if (extra) { + op_ret = writev(fd, vector, extra); + if (op_ret < 0) + goto err; + } + if (remain) { + vector[0].iov_len = remain; + op_ret = writev(fd, vector , 1); + if (op_ret < 0) + goto err; + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} -int32_t -posix_getdents (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, int32_t flag) +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - dir_entry_t entries = {0, }; - dir_entry_t *tmp = NULL; - DIR *dir = NULL; - struct dirent *dirent = NULL; - int real_path_len = -1; - int entry_path_len = -1; - char *entry_path = NULL; - int count = 0; - struct posix_fd *pfd = NULL; - uint64_t tmp_pfd = 0; - struct stat buf = {0,}; - int ret = -1; - char tmp_real_path[ZR_PATH_MAX]; - char linkpath[ZR_PATH_MAX]; - struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + int32_t ret = -1; - DECLARE_OLD_FS_ID_VAR ; + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - op_errno = -ret; gf_log (this->name, GF_LOG_DEBUG, - "fd %p does not have context in %s", - fd, this->name); + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - if (!pfd->path) { - op_errno = EBADFD; - gf_log (this->name, GF_LOG_DEBUG, - "pfd does not have path set (possibly file " - "fd, fd=%p)", fd); + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation fstat failed on fd = %p: %s", fd, + strerror (errno)); goto out; } + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_log(this->name, GF_LOG_ERROR, + "zerofill failed on fd %d length %ld %s", + pfd->fd, len, strerror(errno)); + goto out; + } + if (pfd->flags & (O_SYNC|O_DSYNC)) { + ret = fsync (pfd->fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + pfd->fd, strerror (errno)); + ret = -errno; + goto out; + } + } - real_path = pfd->path; - real_path_len = strlen (real_path); - - entry_path_len = real_path_len + NAME_MAX; - entry_path = CALLOC (1, entry_path_len); - - if (!entry_path) { - op_errno = errno; + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); + "post operation fstat failed on fd=%p: %s", fd, + strerror (errno)); goto out; } - strncpy (entry_path, real_path, entry_path_len); - entry_path[real_path_len] = '/'; +out: + SET_TO_OLD_FS_ID (); - dir = pfd->dir; + return ret; +} - if (!dir) { - op_errno = EBADFD; - gf_log (this->name, GF_LOG_DEBUG, - "pfd does not have dir set (possibly file fd, " - "fd=%p, path=`%s'", - fd, real_path); - goto out; - } +static int32_t +_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; - /* TODO: check for all the type of flag, and behave appropriately */ + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; - while ((dirent = readdir (dir))) { - if (!dirent) - break; + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; - /* This helps in self-heal, when only directories - needs to be replicated */ + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + return 0; - /* This is to reduce the network traffic, in case only - directory is needed from posix */ +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} - strncpy (tmp_real_path, real_path, ZR_PATH_MAX); - strncat (tmp_real_path, "/", - ZR_PATH_MAX - strlen (tmp_real_path)); +static int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; - strncat (tmp_real_path, dirent->d_name, - ZR_PATH_MAX - (strlen (tmp_real_path) + 1)); + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; - ret = posix_lstat_with_gen (this, tmp_real_path, &buf); + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); + return 0; - if ((flag == GF_GET_DIR_ONLY) - && (ret != -1 && !S_ISDIR(buf.st_mode))) { - continue; - } +err: + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); + return 0; - tmp = CALLOC (1, sizeof (*tmp)); +} - if (!tmp) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; - tmp->name = strdup (dirent->d_name); - if (!tmp->name) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } + ret = posix_do_zerofill(frame, this, fd, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; - if (entry_path_len < - (real_path_len + 1 + strlen (tmp->name) + 1)) { - entry_path_len = (real_path_len + - strlen (tmp->name) + 1024); + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); + return 0; - entry_path = realloc (entry_path, entry_path_len); - } +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); + return 0; - strcpy (&entry_path[real_path_len+1], tmp->name); +} - tmp->buf = buf; +int32_t +posix_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + DIR * dir = NULL; + struct posix_fd * pfd = NULL; - if (S_ISLNK(tmp->buf.st_mode)) { + DECLARE_OLD_FS_ID_VAR; - ret = readlink (entry_path, linkpath, ZR_PATH_MAX); - if (ret != -1) { - linkpath[ret] = '\0'; - tmp->link = strdup (linkpath); - } - } else { - tmp->link = ""; - } + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (loc->path, out); + VALIDATE_OR_GOTO (fd, out); - count++; + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); - tmp->next = entries.next; - entries.next = tmp; + op_ret = -1; + dir = opendir (real_path); - /* if size is 0, count can never be = size, so entire - dir is read */ - if (count == size) - break; + if (dir == NULL) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "opendir failed on %s: %s", + real_path, strerror (op_errno)); + goto out; } - FREE (entry_path); + op_ret = dirfd (dir); + if (op_ret < 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "dirfd() failed on %s: %s", + real_path, strerror (op_errno)); + goto out; + } - op_ret = 0; + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } - out: - SET_TO_OLD_FS_ID (); + pfd->dir = dir; + pfd->fd = dirfd (dir); - if (op_ret == -1) { - if (entry_path) - FREE (entry_path); - } + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set the fd context path=%s fd=%p", + real_path, fd); - STACK_UNWIND_STRICT (getdents, frame, op_ret, op_errno, - &entries, count); + op_ret = 0; - if (op_ret == 0) { - while (entries.next) { - tmp = entries.next; - entries.next = entries.next->next; - FREE (tmp->name); - FREE (tmp); +out: + if (op_ret == -1) { + if (dir) { + closedir (dir); + dir = NULL; + } + if (pfd) { + GF_FREE (pfd); + pfd = NULL; } } + SET_TO_OLD_FS_ID (); + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); return 0; } - int32_t posix_releasedir (xlator_t *this, - fd_t *fd) + fd_t *fd) { - int32_t op_ret = -1; - int32_t op_errno = 0; struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; + uint64_t tmp_pfd = 0; int ret = 0; struct posix_private *priv = NULL; @@ -1140,30 +937,20 @@ posix_releasedir (xlator_t *this, ret = fd_ctx_del (fd, this, &tmp_pfd); if (ret < 0) { - op_errno = -ret; gf_log (this->name, GF_LOG_DEBUG, "pfd from fd=%p is NULL", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; + pfd = (struct posix_fd *)(long)tmp_pfd; if (!pfd->dir) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_DEBUG, - "pfd->dir is NULL for fd=%p path=%s", - fd, pfd->path ? pfd->path : "<NULL>"); + gf_log (this->name, GF_LOG_WARNING, + "pfd->dir is NULL for fd=%p", fd); goto out; } priv = this->private; - if (!pfd->path) { - op_errno = EBADFD; - gf_log (this->name, GF_LOG_DEBUG, - "pfd->path was NULL. fd=%p pfd=%p", - fd, pfd); - } - pthread_mutex_lock (&priv->janitor_lock); { INIT_LIST_HEAD (&pfd->list); @@ -1172,79 +959,73 @@ posix_releasedir (xlator_t *this, } pthread_mutex_unlock (&priv->janitor_lock); - op_ret = 0; - - out: +out: return 0; } int32_t posix_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) + loc_t *loc, size_t size, dict_t *xdata) { char * dest = NULL; int32_t op_ret = -1; - int32_t lstat_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - struct stat stbuf = {0,}; + struct iatt stbuf = {0,}; DECLARE_OLD_FS_ID_VAR; - VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame, out); SET_FS_ID (frame->root->uid, frame->root->gid); dest = alloca (size + 1); - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = readlink (real_path, dest, size); + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "readlink on %s failed: %s", loc->path, + "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; } - dest[op_ret] = 0; - - lstat_ret = posix_lstat_with_gen (this, real_path, &stbuf); - if (lstat_ret == -1) { - op_ret = -1; + op_ret = readlink (real_path, dest, size); + if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", loc->path, + "readlink on %s failed: %s", real_path, strerror (op_errno)); goto out; } - out: + dest[op_ret] = 0; +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf); + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); return 0; } -int32_t + +int posix_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev) + loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) { - int tmp_fd = 0; + int tmp_fd = 0; int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = 0; - struct stat stbuf = { 0, }; + char *par_path = 0; + struct iatt stbuf = { 0, }; char was_present = 1; struct posix_private *priv = NULL; gid_t gid = 0; - char *pathdup = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; - char *parentpath = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + void * uuid_req = NULL; DECLARE_OLD_FS_ID_VAR; @@ -1255,52 +1036,77 @@ posix_mknod (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); gid = frame->root->gid; - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)){ - was_present = 0; - } - - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) - goto out; - SET_FS_ID (frame->root->uid, gid); - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - - parentpath = dirname (pathdup); - op_ret = posix_lstat_with_gen (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } - + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + /* Check if the 'gfid' already exists, because this mknod may be an + internal call from distribute for creating 'linkfile', and that + linkfile may be for a hardlinked file */ + if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); + op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get the gfid from dict for %s", + loc->path); + goto real_op; + } + op_ret = posix_create_link_if_gfid_exists (this, uuid_req, + real_path); + if (!op_ret) + goto post_op; + } + +real_op: +#ifdef __NetBSD__ + if (S_ISFIFO(mode)) + op_ret = mkfifo (real_path, mode); + else +#endif /* __NetBSD__ */ op_ret = mknod (real_path, mode, dev); if (op_ret == -1) { op_errno = errno; - if ((op_errno == EINVAL) && S_ISREG (mode)) { - /* Over Darwin, mknod with (S_IFREG|mode) - doesn't work */ - tmp_fd = creat (real_path, mode); - if (tmp_fd == -1) - goto out; - close (tmp_fd); - } else { + if ((op_errno == EINVAL) && S_ISREG (mode)) { + /* Over Darwin, mknod with (S_IFREG|mode) + doesn't work */ + tmp_fd = creat (real_path, mode); + if (tmp_fd == -1) { + gf_log (this->name, GF_LOG_ERROR, + "create failed on %s: %s", + real_path, strerror (errno)); + goto out; + } + close (tmp_fd); + } else { - gf_log (this->name, GF_LOG_ERROR, - "mknod on %s failed: %s", loc->path, - strerror (op_errno)); - goto out; - } + gf_log (this->name, GF_LOG_ERROR, + "mknod on %s failed: %s", real_path, + strerror (op_errno)); + goto out; + } + } + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); } #ifndef HAVE_SET_FSID @@ -1308,40 +1114,53 @@ posix_mknod (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lchown on %s failed: %s", loc->path, + "lchown on %s failed: %s", real_path, strerror (op_errno)); goto out; } #endif - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); +post_op: + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "mknod on %s failed: %s", loc->path, + "mknod on %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; - out: - if (pathdup) - FREE (pathdup); - +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, - loc->inode, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1351,166 +1170,20 @@ posix_mknod (call_frame_t *frame, xlator_t *this, } -static int -janitor_walker (const char *fpath, const struct stat *sb, - int typeflag, struct FTW *ftwbuf) -{ - switch (sb->st_mode & S_IFMT) { - case S_IFREG: - case S_IFBLK: - case S_IFLNK: - case S_IFCHR: - case S_IFIFO: - case S_IFSOCK: - gf_log (THIS->name, GF_LOG_TRACE, - "unlinking %s", fpath); - unlink (fpath); - break; - - case S_IFDIR: - if (ftwbuf->level) { /* don't remove top level dir */ - gf_log (THIS->name, GF_LOG_TRACE, - "removing directory %s", fpath); - - rmdir (fpath); - } - break; - } - - return 0; /* 0 = FTW_CONTINUE */ -} - - -static struct posix_fd * -janitor_get_next_fd (xlator_t *this) -{ - struct posix_private *priv = NULL; - struct posix_fd *pfd = NULL; - - struct timespec timeout; - - priv = this->private; - - pthread_mutex_lock (&priv->janitor_lock); - { - if (list_empty (&priv->janitor_fds)) { - time (&timeout.tv_sec); - timeout.tv_sec += priv->janitor_sleep_duration; - timeout.tv_nsec = 0; - - pthread_cond_timedwait (&priv->janitor_cond, - &priv->janitor_lock, - &timeout); - goto unlock; - } - - pfd = list_entry (priv->janitor_fds.next, struct posix_fd, - list); - - list_del (priv->janitor_fds.next); - } -unlock: - pthread_mutex_unlock (&priv->janitor_lock); - - return pfd; -} - - -static void * -posix_janitor_thread_proc (void *data) -{ - xlator_t * this = NULL; - struct posix_private *priv = NULL; - struct posix_fd *pfd; - - time_t now; - - this = data; - priv = this->private; - - THIS = this; - - while (1) { - time (&now); - if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { - gf_log (this->name, GF_LOG_TRACE, - "janitor cleaning out /" GF_REPLICATE_TRASH_DIR); - - nftw (priv->trash_path, - janitor_walker, - 32, - FTW_DEPTH | FTW_PHYS); - - priv->last_landfill_check = now; - } - - pfd = janitor_get_next_fd (this); - if (pfd) { - if (pfd->dir == NULL) { - gf_log (this->name, GF_LOG_TRACE, - "janitor: closing file fd=%d", pfd->fd); - close (pfd->fd); - } else { - gf_log (this->name, GF_LOG_TRACE, - "janitor: closing dir fd=%p", pfd->dir); - closedir (pfd->dir); - } - - if (pfd->path) - FREE (pfd->path); - - FREE (pfd); - } - } - - return NULL; -} - - -static void -posix_spawn_janitor_thread (xlator_t *this) -{ - struct posix_private *priv = NULL; - int ret = 0; - - priv = this->private; - - LOCK (&priv->lock); - { - if (!priv->janitor_present) { - ret = pthread_create (&priv->janitor, NULL, - posix_janitor_thread_proc, this); - - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "spawning janitor thread failed: %s", - strerror (errno)); - goto unlock; - } - - priv->janitor_present = _gf_true; - } - } -unlock: - UNLOCK (&priv->lock); -} - - -int32_t +int posix_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode) + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = NULL; - struct stat stbuf = {0, }; + char *par_path = NULL; + struct iatt stbuf = {0, }; char was_present = 1; struct posix_private *priv = NULL; gid_t gid = 0; - char *pathdup = NULL; - char *parentpath = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1518,85 +1191,112 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); + /* The Hidden directory should be for housekeeping purpose and it + should not get created from a user request */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "mkdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); gid = frame->root->gid; - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if ((op_ret == -1) && (errno == ENOENT)) { was_present = 0; } - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) - goto out; - SET_FS_ID (frame->root->uid, gid); - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - parentpath = dirname (pathdup); - - op_ret = posix_lstat_with_gen (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + mode |= S_ISGID; + } + op_ret = mkdir (real_path, mode); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "mkdir of %s failed: %s", loc->path, + "mkdir of %s failed: %s", real_path, strerror (op_errno)); goto out; } + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); + } + #ifndef HAVE_SET_FSID op_ret = chown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "chown on %s failed: %s", loc->path, + "chown on %s failed: %s", real_path, strerror (op_errno)); goto out; } #endif - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", loc->path, + "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } op_ret = 0; - out: - if (pathdup) - FREE (pathdup); - +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, - loc->inode, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1608,17 +1308,17 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, int32_t posix_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, int xflag, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *pathdup = NULL; - char *parentpath = NULL; - int32_t fd = -1; - struct posix_private *priv = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + struct iatt stbuf = {0,}; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1627,65 +1327,60 @@ posix_unlink (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - - parentpath = dirname (pathdup); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - op_ret = posix_lstat_with_gen (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (stbuf.ia_nlink == 1) + posix_handle_unset (this, stbuf.ia_gfid, NULL); + priv = this->private; if (priv->background_unlink) { - if (S_ISREG (loc->inode->st_mode)) { + if (IA_ISREG (loc->inode->ia_type)) { fd = open (real_path, O_RDONLY); if (fd == -1) { op_ret = -1; op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "open of %s failed: %s", loc->path, + "open of %s failed: %s", real_path, strerror (op_errno)); goto out; } } } - op_ret = unlink (real_path); + op_ret = sys_unlink (real_path); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "unlink of %s failed: %s", loc->path, + "unlink of %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; - out: - if (pathdup) - FREE (pathdup); - +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, - &preparent, &postparent); + &preparent, &postparent, NULL); if (fd != -1) { close (fd); @@ -1694,17 +1389,20 @@ posix_unlink (call_frame_t *frame, xlator_t *this, return 0; } -int32_t + +int posix_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - char * pathdup = NULL; - char * parentpath = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; + char * par_path = NULL; + char * gfid_str = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + struct iatt stbuf; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -1713,75 +1411,102 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); + /* The Hidden directory should be for housekeeping purpose and it + should not get deleted from inside process */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "rmdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + priv = this->private; - parentpath = dirname (pathdup); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - op_ret = posix_lstat_with_gen (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } - op_ret = rmdir (real_path); + if (flags) { + gfid_str = uuid_utoa (stbuf.ia_gfid); + char *tmp_path = alloca (strlen (priv->trash_path) + + strlen ("/") + + strlen (gfid_str) + 1); + + mkdir (priv->trash_path, 0755); + sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str); + op_ret = rename (real_path, tmp_path); + } else { + op_ret = rmdir (real_path); + } op_errno = errno; - if (op_errno == EEXIST) - /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ - op_errno = ENOTEMPTY; + if (op_ret == 0) { + posix_handle_unset (this, stbuf.ia_gfid, NULL); + } + + if (op_errno == EEXIST) + /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ + op_errno = ENOTEMPTY; /* No need to log a common error as ENOTEMPTY */ if (op_ret == -1 && op_errno != ENOTEMPTY) { gf_log (this->name, GF_LOG_ERROR, - "rmdir of %s failed: %s", loc->path, + "rmdir of %s failed: %s", real_path, strerror (op_errno)); } - if (op_ret == -1) + if (op_ret == -1) { + gf_log (this->name, + (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "%s on %s failed", (flags) ? "rename" : "rmdir", + real_path); goto out; + } - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + par_path, strerror (op_errno)); goto out; } - out: - if (pathdup) - FREE (pathdup); - +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - &preparent, &postparent); + &preparent, &postparent, NULL); return 0; } -int32_t + +int posix_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc) + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = 0; - struct stat stbuf = { 0, }; + char * par_path = 0; + struct iatt stbuf = { 0, }; struct posix_private *priv = NULL; gid_t gid = 0; - char was_present = 1; - char *pathdup = NULL; - char *parentpath = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; + char was_present = 1; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1793,82 +1518,96 @@ posix_symlink (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); if ((op_ret == -1) && (errno == ENOENT)){ was_present = 0; } - gid = frame->root->gid; - - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) - goto out; - SET_FS_ID (frame->root->uid, gid); - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - parentpath = dirname (pathdup); + gid = frame->root->gid; - op_ret = posix_lstat_with_gen (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + op_ret = symlink (linkname, real_path); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "symlink of %s --> %s failed: %s", - loc->path, linkname, strerror (op_errno)); + real_path, linkname, strerror (op_errno)); goto out; } + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); + } + #ifndef HAVE_SET_FSID op_ret = lchown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lchown failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } #endif - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); + + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } - - op_ret = 0; - out: - if (pathdup) - FREE (pathdup); + op_ret = 0; +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, - loc->inode, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1880,23 +1619,26 @@ posix_symlink (call_frame_t *frame, xlator_t *this, int posix_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_oldpath = NULL; char *real_newpath = NULL; - struct stat stbuf = {0, }; + char *par_oldpath = NULL; + char *par_newpath = NULL; + struct iatt stbuf = {0, }; struct posix_private *priv = NULL; - char was_present = 1; - char *oldpathdup = NULL; - char *oldparentpath = NULL; - char *newpathdup = NULL; - char *newparentpath = NULL; - struct stat preoldparent = {0, }; - struct stat postoldparent = {0, }; - struct stat prenewparent = {0, }; - struct stat postnewparent = {0, }; + char was_present = 1; + struct iatt preoldparent = {0, }; + struct iatt postoldparent = {0, }; + struct iatt prenewparent = {0, }; + struct iatt postnewparent = {0, }; + char olddirid[64]; + char newdirid[64]; + uuid_t victim = {0}; + int was_dir = 0; + int nlink = 0; DECLARE_OLD_FS_ID_VAR; @@ -1909,53 +1651,84 @@ posix_rename (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_oldpath, this, oldloc->path); - MAKE_REAL_PATH (real_newpath, this, newloc->path); - - oldpathdup = strdup (real_oldpath); - GF_VALIDATE_OR_GOTO (this->name, oldpathdup, out); + MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - oldparentpath = dirname (oldpathdup); - - op_ret = posix_lstat_with_gen (this, oldparentpath, &preoldparent); + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - oldloc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_oldpath, strerror (op_errno)); goto out; } - newpathdup = strdup (real_newpath); - GF_VALIDATE_OR_GOTO (this->name, newpathdup, out); - - newparentpath = dirname (newpathdup); - - op_ret = posix_lstat_with_gen (this, newparentpath, &prenewparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "pre-operation lstat on parent of %s failed: %s", - newloc->path, strerror (op_errno)); + par_newpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if ((op_ret == -1) && (errno == ENOENT)){ was_present = 0; + } else { + uuid_copy (victim, stbuf.ia_gfid); + if (IA_ISDIR (stbuf.ia_type)) + was_dir = 1; + nlink = stbuf.ia_nlink; } - op_ret = rename (real_oldpath, real_newpath); + if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { + gf_log (this->name, GF_LOG_WARNING, + "found directory at %s while expecting ENOENT", + real_newpath); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && + uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "found directory %s at %s while renaming %s", + uuid_utoa_r (newloc->inode->gfid, olddirid), + real_newpath, + uuid_utoa_r (stbuf.ia_gfid, newdirid)); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_unset (this, oldloc->inode->gfid, NULL); + } + + op_ret = sys_rename (real_oldpath, real_newpath); if (op_ret == -1) { op_errno = errno; gf_log (this->name, - (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), + (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), "rename of %s to %s failed: %s", - oldloc->path, newloc->path, strerror (op_errno)); + real_oldpath, real_newpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf); + if (was_dir) + posix_handle_unset (this, victim, NULL); + + if (was_present && !was_dir && nlink == 1) + posix_handle_unset (this, victim, NULL); + + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_soft (this, real_newpath, newloc, + oldloc->inode->gfid, NULL); + } + + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -1964,43 +1737,37 @@ posix_rename (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_lstat_with_gen (this, oldparentpath, &postoldparent); + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - oldloc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_oldpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, newparentpath, &postnewparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - newloc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_newpath, strerror (op_errno)); goto out; } op_ret = 0; - out: - if (oldpathdup) - FREE (oldpathdup); - - if (newpathdup) - FREE (newpathdup); - +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, &preoldparent, &postoldparent, - &prenewparent, &postnewparent); + &prenewparent, &postnewparent, NULL); if ((op_ret == -1) && !was_present) { unlink (real_newpath); - } + } return 0; } @@ -2008,19 +1775,18 @@ posix_rename (call_frame_t *frame, xlator_t *this, int posix_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_oldpath = 0; char *real_newpath = 0; - struct stat stbuf = {0, }; + char *par_newpath = 0; + struct iatt stbuf = {0, }; struct posix_private *priv = NULL; char was_present = 1; - char *newpathdup = NULL; - char *newparentpath = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -2033,40 +1799,42 @@ posix_link (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_oldpath, this, oldloc->path); - MAKE_REAL_PATH (real_newpath, this, newloc->path); + MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); - op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf); + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); if ((op_ret == -1) && (errno == ENOENT)) { was_present = 0; } - newpathdup = strdup (real_newpath); - if (!newpathdup) { - gf_log (this->name, GF_LOG_ERROR, "strdup failed"); - op_errno = ENOMEM; - goto out; - } - - newparentpath = dirname (newpathdup); - op_ret = posix_lstat_with_gen (this, newparentpath, &preparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", - newparentpath, strerror (op_errno)); + par_newpath, strerror (op_errno)); goto out; } +#ifdef HAVE_LINKAT + /* + * On most systems (Linux being the notable exception), link(2) + * first resolves symlinks. If the target is a directory or + * is nonexistent, it will fail. linkat(2) operates on the + * symlink instead of its target when the AT_SYMLINK_FOLLOW + * flag is not supplied. + */ + op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0); +#else op_ret = link (real_oldpath, real_newpath); +#endif if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "link %s to %s failed: %s", - oldloc->path, newloc->path, strerror (op_errno)); + real_oldpath, real_newpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, real_newpath, &stbuf); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2075,23 +1843,22 @@ posix_link (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_lstat_with_gen (this, newparentpath, &postparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", - newparentpath, strerror (op_errno)); + par_newpath, strerror (op_errno)); goto out; } op_ret = 0; - out: - if (newpathdup) - FREE (newpathdup); +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, - oldloc->inode, &stbuf, &preparent, &postparent); + (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_newpath); @@ -2102,17 +1869,15 @@ posix_link (call_frame_t *frame, xlator_t *this, int32_t -posix_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset) +posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = 0; struct posix_private *priv = NULL; - struct stat prebuf = {0,}; - struct stat postbuf = {0,}; + struct iatt prebuf = {0,}; + struct iatt postbuf = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -2124,14 +1889,13 @@ posix_truncate (call_frame_t *frame, VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = posix_lstat_with_gen (this, real_path, &prebuf); + MAKE_INODE_HANDLE (real_path, this, loc, &prebuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "pre-operation lstat on %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } @@ -2140,11 +1904,11 @@ posix_truncate (call_frame_t *frame, op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "truncate on %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gen (this, real_path, &postbuf); + op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", @@ -2154,36 +1918,35 @@ posix_truncate (call_frame_t *frame, op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - &prebuf, &postbuf); + &prebuf, &postbuf, NULL); return 0; } -int32_t +int posix_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, - fd_t *fd) + mode_t umask, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int32_t _fd = -1; int _flags = 0; char * real_path = NULL; - struct stat stbuf = {0, }; + char * par_path = NULL; + struct iatt stbuf = {0, }; struct posix_fd * pfd = NULL; struct posix_private * priv = NULL; - char was_present = 1; + char was_present = 1; gid_t gid = 0; - char *pathdup = NULL; - char *parentpath = NULL; - struct stat preparent = {0,}; - struct stat postparent = {0,}; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -2196,31 +1959,25 @@ posix_create (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); gid = frame->root->gid; - op_ret = setgid_override (this, real_path, &gid); - - if (op_ret < 0) { - goto out; - } - SET_FS_ID (frame->root->uid, gid); - pathdup = strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - parentpath = dirname (pathdup); - - op_ret = posix_lstat_with_gen (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + if (!flags) { _flags = O_CREAT | O_RDWR | O_EXCL; } @@ -2228,7 +1985,7 @@ posix_create (call_frame_t *frame, xlator_t *this, _flags = flags | O_CREAT; } - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if ((op_ret == -1) && (errno == ENOENT)) { was_present = 0; } @@ -2240,23 +1997,48 @@ posix_create (call_frame_t *frame, xlator_t *this, if (_fd == -1) { op_errno = errno; + op_ret = -1; gf_log (this->name, GF_LOG_ERROR, - "open on %s failed: %s", loc->path, + "open on %s failed: %s", real_path, strerror (op_errno)); goto out; } + if (was_present) + goto fill_stat; + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting gfid on %s failed", real_path); + } + #ifndef HAVE_SET_FSID op_ret = chown (real_path, frame->root->uid, gid); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "chown on %s failed: %s", - real_path, strerror (op_errno)); + real_path, strerror (op_errno)); } #endif - op_ret = posix_fstat_with_gen (this, _fd, &stbuf); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + +fill_stat: + op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2264,41 +2046,40 @@ posix_create (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_lstat_with_gen (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } - op_ret = -1; - pfd = CALLOC (1, sizeof (*pfd)); - + op_ret = -1; + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); if (!pfd) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); goto out; } pfd->flags = flags; pfd->fd = _fd; - fd_ctx_set (fd, this, (uint64_t)(long)pfd); + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set the fd context path=%s fd=%p", + real_path, fd); LOCK (&priv->lock); { - priv->stats.nr_files++; + priv->nr_files++; } UNLOCK (&priv->lock); op_ret = 0; - out: - if (pathdup) - FREE (pathdup); +out: SET_TO_OLD_FS_ID (); if ((-1 == op_ret) && (_fd != -1)) { @@ -2310,14 +2091,15 @@ posix_create (call_frame_t *frame, xlator_t *this, } STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, - fd, loc->inode, &stbuf, &preparent, &postparent); + fd, (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, xdata); return 0; } int32_t posix_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd, int wbflags) + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2325,9 +2107,7 @@ posix_open (call_frame_t *frame, xlator_t *this, int32_t _fd = -1; struct posix_fd *pfd = NULL; struct posix_private *priv = NULL; - char was_present = 1; - gid_t gid = 0; - struct stat stbuf = {0, }; + struct iatt stbuf = {0, }; DECLARE_OLD_FS_ID_VAR; @@ -2340,22 +2120,14 @@ posix_open (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) - goto out; + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); - SET_FS_ID (frame->root->uid, gid); + op_ret = -1; + SET_FS_ID (frame->root->uid, frame->root->gid); if (priv->o_direct) flags |= O_DIRECT; - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)) { - was_present = 0; - } - _fd = open (real_path, flags, 0); if (_fd == -1) { op_ret = -1; @@ -2365,76 +2137,47 @@ posix_open (call_frame_t *frame, xlator_t *this, goto out; } - pfd = CALLOC (1, sizeof (*pfd)); - + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); if (!pfd) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); goto out; } pfd->flags = flags; pfd->fd = _fd; - if (wbflags == GF_OPEN_FSYNC) - pfd->flushwrites = 1; - fd_ctx_set (fd, this, (uint64_t)(long)pfd); - -#ifndef HAVE_SET_FSID - if (flags & O_CREAT) { - op_ret = chown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chown on %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - } -#endif - - if (flags & O_CREAT) { - op_ret = posix_lstat_with_gen (this, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "lstat on (%s) " - "failed: %s", real_path, strerror (op_errno)); - goto out; - } - } + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to set the fd context path=%s fd=%p", + real_path, fd); LOCK (&priv->lock); { - priv->stats.nr_files++; + priv->nr_files++; } UNLOCK (&priv->lock); op_ret = 0; - out: +out: if (op_ret == -1) { if (_fd != -1) { close (_fd); - _fd = -1; } } SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); return 0; } -#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \ - (unsigned long)(~(bound - 1)))) - int posix_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - uint64_t tmp_pfd = 0; int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; @@ -2443,8 +2186,7 @@ posix_readv (call_frame_t *frame, xlator_t *this, struct iobref * iobref = NULL; struct iovec vec = {0,}; struct posix_fd * pfd = NULL; - struct stat stbuf = {0,}; - int align = 1; + struct iatt stbuf = {0,}; int ret = -1; VALIDATE_OR_GOTO (frame, out); @@ -2455,44 +2197,28 @@ posix_readv (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; if (!size) { op_errno = EINVAL; - gf_log (this->name, GF_LOG_DEBUG, "size=%"GF_PRI_SIZET, size); + gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); goto out; } - if (pfd->flags & O_DIRECT) { - align = 4096; /* align to page boundary */ - } - - iobuf = iobuf_get (this->ctx->iobuf_pool); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); if (!iobuf) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); + op_errno = ENOMEM; goto out; } _fd = pfd->fd; - - op_ret = lseek (_fd, offset, SEEK_SET); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lseek(%"PRId64") failed: %s", - offset, strerror (op_errno)); - goto out; - } - - op_ret = read (_fd, iobuf->ptr, size); + op_ret = pread (_fd, iobuf->ptr, size, offset); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2504,14 +2230,12 @@ posix_readv (call_frame_t *frame, xlator_t *this, LOCK (&priv->lock); { priv->read_value += op_ret; - priv->interval_read += op_ret; } UNLOCK (&priv->lock); vec.iov_base = iobuf->ptr; vec.iov_len = op_ret; - op_ret = -1; iobref = iobref_new (); iobref_add (iobref, iobuf); @@ -2521,7 +2245,7 @@ posix_readv (call_frame_t *frame, xlator_t *this, * we read from */ - op_ret = posix_fstat_with_gen (this, _fd, &stbuf); + op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2531,16 +2255,14 @@ posix_readv (call_frame_t *frame, xlator_t *this, } /* Hack to notify higher layers of EOF. */ - if (stbuf.st_size == 0) - op_errno = ENOENT; - else if ((offset + vec.iov_len) == stbuf.st_size) + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) op_errno = ENOENT; op_ret = vec.iov_len; out: STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - &vec, 1, &stbuf, iobref); + &vec, 1, &stbuf, iobref, NULL); if (iobref) iobref_unref (iobref); @@ -2552,26 +2274,139 @@ out: int32_t -posix_writev (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) +__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset) +{ + int32_t op_ret = 0; + int idx = 0; + int retval = 0; + off_t internal_off = 0; + + if (!vector) + return -EFAULT; + + internal_off = offset; + for (idx = 0; idx < count; idx++) { + retval = pwrite (fd, vector[idx].iov_base, vector[idx].iov_len, + internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + op_ret += retval; + internal_off += retval; + } + +err: + return op_ret; +} + +int32_t +__posix_writev (int fd, struct iovec *vector, int count, off_t startoff, + int odirect) +{ + int32_t op_ret = 0; + int idx = 0; + int max_buf_size = 0; + int retval = 0; + char *buf = NULL; + char *alloc_buf = NULL; + off_t internal_off = 0; + + /* Check for the O_DIRECT flag during open() */ + if (!odirect) + return __posix_pwritev (fd, vector, count, startoff); + + for (idx = 0; idx < count; idx++) { + if (max_buf_size < vector[idx].iov_len) + max_buf_size = vector[idx].iov_len; + } + + alloc_buf = _page_aligned_alloc (max_buf_size, &buf); + if (!alloc_buf) { + op_ret = -errno; + goto err; + } + + internal_off = startoff; + for (idx = 0; idx < count; idx++) { + memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); + + /* not sure whether writev works on O_DIRECT'd fd */ + retval = pwrite (fd, buf, vector[idx].iov_len, internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + + op_ret += retval; + internal_off += retval; + } + +err: + GF_FREE (alloc_buf); + + return op_ret; +} + +dict_t* +_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: " + "fd: %p inode: %p gfid:%s", fd, inode?inode:0, + inode?uuid_utoa(inode->gfid):"N/A"); + goto out; + } + + if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_OPEN_FD_COUNT); + } + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, + is_append); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_WRITE_IS_APPEND); + } +out: + return rsp_xdata; +} + +int32_t +posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_private * priv = NULL; struct posix_fd * pfd = NULL; - struct stat preop = {0,}; - struct stat postop = {0,}; + struct iatt preop = {0,}; + struct iatt postop = {0,}; int ret = -1; - - int idx = 0; - int align = 4096; - int max_buf_size = 0; - int retval = 0; - char * buf = NULL; - char * alloc_buf = NULL; - uint64_t tmp_pfd = 0; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -2583,135 +2418,111 @@ posix_writev (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gen (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "pre-operation fstat failed on fd=%p: %s", fd, - strerror (op_errno)); - goto out; - } + if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. - op_ret = lseek (_fd, offset, SEEK_SET); + So lock before preop-stat and unlock after write. + */ + locked = _gf_true; + LOCK(&fd->inode->lock); + } + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lseek(%"PRId64") on fd=%p failed: %s", - offset, fd, strerror (op_errno)); + "pre-operation fstat failed on fd=%p: %s", fd, + strerror (op_errno)); goto out; } - /* Check for the O_DIRECT flag during open() */ - if (pfd->flags & O_DIRECT) { - /* This is O_DIRECT'd file */ - op_ret = -1; - for (idx = 0; idx < count; idx++) { - if (max_buf_size < vector[idx].iov_len) - max_buf_size = vector[idx].iov_len; - } - - alloc_buf = MALLOC (1 * (max_buf_size + align)); - if (!alloc_buf) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - - for (idx = 0; idx < count; idx++) { - /* page aligned buffer */ - buf = ALIGN_BUF (alloc_buf, align); - - memcpy (buf, vector[idx].iov_base, - vector[idx].iov_len); - - /* not sure whether writev works on O_DIRECT'd fd */ - retval = write (_fd, buf, vector[idx].iov_len); - - if (retval == -1) { - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "O_DIRECT enabled on fd=%p: %s", - fd, strerror (op_errno)); - goto out; - } + if (locked) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } - break; - } - if (op_ret == -1) - op_ret = 0; - op_ret += retval; - } + op_ret = __posix_writev (_fd, vector, count, offset, + (pfd->flags & O_DIRECT)); - } else /* if (O_DIRECT) */ { + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } - /* This is not O_DIRECT'd fd */ - op_ret = writev (_fd, vector, count); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "writev failed on fd=%p: %s", - fd, strerror (op_errno)); - goto out; - } + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64 + ", %s", offset, strerror (op_errno)); + goto out; } LOCK (&priv->lock); { priv->write_value += op_ret; - priv->interval_write += op_ret; } UNLOCK (&priv->lock); if (op_ret >= 0) { + rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); /* wiretv successful, we also need to get the stat of * the file we wrote to */ - if (pfd->flushwrites) { - /* NOTE: ignore the error, if one occurs at this - * point */ - fsync (_fd); + if (flags & (O_SYNC|O_DSYNC)) { + ret = fsync (_fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + _fd, strerror (errno)); + op_ret = -1; + op_errno = errno; + goto out; + } } - ret = posix_fstat_with_gen (this, _fd, &postop); + ret = posix_fdstat (this, _fd, &postop); if (ret == -1) { - op_ret = -1; + op_ret = -1; op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "post-operation fstat failed on fd=%p: %s", fd, strerror (op_errno)); goto out; } } - out: - if (alloc_buf) { - FREE (alloc_buf); - } +out: + + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + if (rsp_xdata) + dict_unref (rsp_xdata); return 0; } int32_t posix_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, dict_t *xdata) { char * real_path = NULL; int32_t op_ret = -1; @@ -2724,7 +2535,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (this->private, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); priv = this->private; @@ -2732,7 +2543,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "statvfs failed on %s: %s", real_path, strerror (op_errno)); goto out; @@ -2749,81 +2560,65 @@ posix_statfs (call_frame_t *frame, xlator_t *this, op_ret = 0; - out: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf); +out: + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); return 0; } int32_t posix_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - int _fd = -1; - struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL on fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - _fd = pfd->fd; - - /* do nothing */ op_ret = 0; - out: - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); +out: + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); return 0; } int32_t -posix_release (xlator_t *this, - fd_t *fd) +posix_release (xlator_t *this, fd_t *fd) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; struct posix_private * priv = NULL; struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + uint64_t tmp_pfd = 0; VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); priv = this->private; - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = fd_ctx_del (fd, this, &tmp_pfd); if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - _fd = pfd->fd; + pfd = (struct posix_fd *)(long)tmp_pfd; if (pfd->dir) { - op_ret = -1; - op_errno = EBADF; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } @@ -2838,29 +2633,54 @@ posix_release (xlator_t *this, LOCK (&priv->lock); { - priv->stats.nr_files--; + priv->nr_files--; } UNLOCK (&priv->lock); - op_ret = 0; - - out: +out: return 0; } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync) + fd_t *fd, int32_t datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; - struct stat preop = {0,}; - struct stat postop = {0,}; + struct iatt preop = {0,}; + struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2876,21 +2696,26 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif - ret = fd_ctx_get (fd, this, &tmp_pfd); + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd not found in fd's ctx"); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gen (this, _fd, &preop); + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pre-operation fstat failed on fd=%p: %s", fd, strerror (op_errno)); goto out; @@ -2900,22 +2725,27 @@ posix_fsync (call_frame_t *frame, xlator_t *this, ; #ifdef HAVE_FDATASYNC op_ret = fdatasync (_fd); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "fdatasync on fd=%p failed: %s", + fd, strerror (errno)); + } #endif } else { op_ret = fsync (_fd); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "fsync on fd=%p failed: %s", fd, strerror (op_errno)); goto out; } } - op_ret = posix_fstat_with_gen (this, _fd, &postop); + op_ret = posix_fdstat (this, _fd, &postop); if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "post-operation fstat failed on fd=%p: %s", fd, strerror (op_errno)); goto out; @@ -2923,155 +2753,37 @@ posix_fsync (call_frame_t *frame, xlator_t *this, op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, + NULL); return 0; } static int gf_posix_xattr_enotsup_log; - -int -set_file_contents (xlator_t *this, char *real_path, - data_pair_t *trav, int flags) -{ - char * key = NULL; - char real_filepath[ZR_PATH_MAX] = {0,}; - int32_t file_fd = -1; - int op_ret = 0; - int ret = -1; - - key = &(trav->key[15]); - sprintf (real_filepath, "%s/%s", real_path, key); - - if (flags & XATTR_REPLACE) { - /* if file exists, replace it - * else, error out */ - file_fd = open (real_filepath, O_TRUNC|O_WRONLY); - - if (file_fd == -1) { - goto create; - } - - if (trav->value->len) { - ret = write (file_fd, trav->value->data, - trav->value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed while doing setxattr " - "for key %s on path %s: %s", - key, real_filepath, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - } - - create: /* we know file doesn't exist, create it */ - - file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "failed to open file %s with O_CREAT: %s", - key, strerror (errno)); - goto out; - } - - ret = write (file_fd, trav->value->data, trav->value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed on %s while setxattr with " - "key %s: %s", - real_filepath, key, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s while setxattr with " - "key %s: %s", - real_filepath, key, strerror (errno)); - goto out; - } - } - - out: - return op_ret; -} - -int -handle_pair (xlator_t *this, char *real_path, - data_pair_t *trav, int flags) +static int +_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) { - int sys_ret = -1; - int ret = 0; - - if (ZR_FILE_CONTENT_REQUEST(trav->key)) { - ret = set_file_contents (this, real_path, trav, flags); - } else { - sys_ret = sys_lsetxattr (real_path, trav->key, - trav->value->data, - trav->value->len, flags); + posix_xattr_filler_t *filler = NULL; - if (sys_ret < 0) { - if (errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported"); - } else if (errno == ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "setxattr on %s failed: %s", real_path, - strerror (errno)); - } else { - -#ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "%s: key:%s error:%s", - real_path, trav->key, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_ERROR, - "%s: key:%s error:%s", - real_path, trav->key, - strerror (errno)); -#endif /* DARWIN */ - } + filler = tmp; - ret = -errno; - goto out; - } - } - out: - return ret; + return posix_handle_pair (filler->this, filler->real_path, k, v, + filler->flags); } int32_t posix_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags) + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - data_pair_t * trav = NULL; - int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -3081,96 +2793,72 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (dict, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); - trav = dict->members_list; + op_ret = -1; + dict_del (dict, GFID_XATTR_KEY); - while (trav) { - ret = handle_pair (this, real_path, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.real_path = real_path; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); return 0; } + int -get_file_contents (xlator_t *this, char *real_path, - const char *name, char **contents) +posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) { - char real_filepath[ZR_PATH_MAX] = {0,}; - char * key = NULL; - int32_t file_fd = -1; - struct stat stbuf = {0,}; - int op_ret = 0; - int ret = -1; - - key = (char *) &(name[15]); - sprintf (real_filepath, "%s/%s", real_path, key); - - op_ret = posix_lstat_with_gen (this, real_filepath, &stbuf); - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - - file_fd = open (real_filepath, O_RDONLY); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - - *contents = CALLOC (stbuf.st_size + 1, sizeof(char)); - - if (! *contents) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); - goto out; - } - - ret = read (file_fd, *contents, stbuf.st_size); - if (ret <= 0) { - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s", - real_filepath, strerror (errno)); - goto out; - } + char *real_path = NULL; + struct dirent *dirent = NULL; + DIR *fd = NULL; + const char *fname = NULL; + char *found = NULL; + int ret = -1; + int op_ret = -1; + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + fd = opendir (real_path); + if (!fd) + return -errno; + + fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); + + while ((dirent = readdir (fd))) { + if (strcasecmp (dirent->d_name, fname) == 0) { + found = gf_strdup (dirent->d_name); + if (!found) { + closedir (fd); + return -ENOMEM; + } + break; + } + } - *contents[stbuf.st_size] = '\0'; + closedir (fd); - op_ret = close (file_fd); - file_fd = -1; - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s", - real_filepath, strerror (errno)); - goto out; - } + if (!found) + return -ENOENT; - out: - if (op_ret < 0) { - if (*contents) - FREE (*contents); - if (file_fd != -1) - close (file_fd); - } + ret = dict_set_dynstr (dict, (char *)key, found); + if (ret) { + GF_FREE (found); + return -ENOMEM; + } + ret = strlen (found) + 1; - return op_ret; + return ret; } /** @@ -3180,22 +2868,25 @@ get_file_contents (xlator_t *this, char *real_path, */ int32_t posix_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { - struct posix_private *priv = NULL; - int32_t op_ret = -1; - int32_t op_errno = ENOENT; - int32_t list_offset = 0; - size_t size = 0; - size_t remaining_size = 0; - char key[1024] = {0,}; - char gen_key[1024] = {0,}; - char * value = NULL; - char * list = NULL; - char * real_path = NULL; - dict_t * dict = NULL; - char * file_contents = NULL; - int ret = -1; + struct posix_private *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t list_offset = 0; + ssize_t size = 0; + size_t remaining_size = 0; + char key[4096] = {0,}; + char host_buf[1024] = {0,}; + char *value = NULL; + char *list = NULL; + char *real_path = NULL; + dict_t *dict = NULL; + char *file_contents = NULL; + int ret = -1; + char *path = NULL; + char *rpath = NULL; + char *dyn_rpath = NULL; DECLARE_OLD_FS_ID_VAR; @@ -3204,43 +2895,193 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + op_ret = -1; priv = this->private; - if (loc->inode && S_ISDIR(loc->inode->st_mode) && name && - ZR_FILE_CONTENT_REQUEST(name)) { - ret = get_file_contents (this, real_path, name, - &file_contents); + if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && + ZR_FILE_CONTENT_REQUEST(name)) { + ret = posix_get_file_contents (this, loc->gfid, &name[15], + &file_contents); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_ERROR, - "getting file contents failed: %s", + "getting file contents failed: %s", strerror (op_errno)); goto out; } } - /* Get the total size */ - dict = get_new_dict (); + dict = dict_new (); if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); + op_errno = ENOMEM; goto out; } - if (loc->inode && S_ISREG (loc->inode->st_mode) && name && - (strcmp (name, "trusted.glusterfs.location") == 0)) { - ret = dict_set_static_ptr (dict, - "trusted.glusterfs.location", - priv->hostname); + if (loc->inode && name && + (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename (frame, this, loc, + name, dict, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + gf_log (this->name, (op_errno == ENOENT) ? + GF_LOG_DEBUG : GF_LOG_WARNING, + "Failed to get real filename (%s, %s): %s", + loc->path, name, strerror (op_errno)); + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { + if (!list_empty (&loc->inode->fd_list)) { + ret = dict_set_uint32 (dict, (char *)name, 1); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to set dictionary value for %s", + name); + } else { + ret = dict_set_uint32 (dict, (char *)name, 0); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to set dictionary value for %s", + name); + } + goto done; + } + if (loc->inode && name && + (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) { + if (LOC_HAS_ABSPATH (loc)) + MAKE_REAL_PATH (rpath, this, loc->path); + else + rpath = real_path; + + (void) snprintf (host_buf, 1024, + "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo + && !uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa (priv->glusterd_uuid) + : priv->hostname), + rpath); + + dyn_rpath = gf_strdup (host_buf); + if (!dyn_rpath) { + ret = -1; + goto done; + } + size = strlen (dyn_rpath) + 1; + ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY, + dyn_rpath); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, - "could not set hostname (%s) in dictionary", - priv->hostname); + "could not set value (%s) in dictionary", + dyn_rpath); + GF_FREE (dyn_rpath); } + goto done; - } - + } + + if (loc->inode && name && + (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0) + && !uuid_is_null (priv->glusterd_uuid)) { + (void) snprintf (host_buf, 1024, "%s", + uuid_utoa (priv->glusterd_uuid)); + + dyn_rpath = gf_strdup (host_buf); + if (!dyn_rpath) { + ret = -1; + goto done; + } + + size = strlen (dyn_rpath) + 1; + ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY, + dyn_rpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + dyn_rpath); + GF_FREE (dyn_rpath); + } + goto done; + } + + if (loc->inode && name && + (strcmp (name, GFID_TO_PATH_KEY) == 0)) { + ret = inode_path (loc->inode, NULL, &path); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: could not get " + "inode path", uuid_utoa (loc->inode->gfid)); + goto done; + } + + ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + host_buf); + GF_FREE (path); + } + goto done; + } + + if (name) { + strcpy (key, name); + + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else if (op_errno == ENOATTR || + op_errno == ENODATA) { + gf_log (this->name, GF_LOG_DEBUG, + "No such attribute:%s for file %s", + key, real_path); + } else { + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s: %s (%s)", + real_path, key, strerror (op_errno)); + } + + goto done; + } + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + goto out; + } + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); + goto out; + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } + + goto done; + } size = sys_llistxattr (real_path, NULL, 0); if (size == -1) { @@ -3249,11 +3090,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); } else { gf_log (this->name, GF_LOG_ERROR, - "listxattr failed on %s: %s", + "listxattr failed on %s: %s", real_path, strerror (op_errno)); } goto out; @@ -3265,58 +3108,71 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, list = alloca (size + 1); if (!list) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); goto out; } - ret = snprintf (gen_key, 1023, "trusted.%s.gen", this->name); - size = sys_llistxattr (real_path, list, size); remaining_size = size; list_offset = 0; while (remaining_size > 0) { - if(*(list + list_offset) == '\0') + if (*(list + list_offset) == '\0') break; strcpy (key, list + list_offset); - op_ret = sys_lgetxattr (real_path, key, NULL, 0); - if (op_ret == -1) + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); break; + } - value = CALLOC (op_ret + 1, sizeof(char)); + value = GF_CALLOC (size + 1, sizeof(char), + gf_posix_mt_char); if (!value) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); goto out; } - op_ret = sys_lgetxattr (real_path, key, value, op_ret); - if (op_ret == -1) + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); break; + } - value [op_ret] = '\0'; - if (strcmp (key, gen_key) != 0) - dict_set (dict, key, data_from_dynptr (value, op_ret)); - else - FREE (value); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } remaining_size -= strlen (key) + 1; list_offset += strlen (key) + 1; } /* while (remaining_size > 0) */ - done: +done: op_ret = size; if (dict) { - dict_ref (dict); + dict_del (dict, GFID_XATTR_KEY); } - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -3327,17 +3183,16 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, int32_t posix_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name) + fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = ENOENT; - uint64_t tmp_pfd = 0; struct posix_fd * pfd = NULL; int _fd = -1; int32_t list_offset = 0; - size_t size = 0; + ssize_t size = 0; size_t remaining_size = 0; - char key[1024] = {0,}; + char key[4096] = {0,}; char * value = NULL; char * list = NULL; dict_t * dict = NULL; @@ -3351,24 +3206,68 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, SET_FS_ID (frame->root->uid, frame->root->gid); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; /* Get the total size */ dict = get_new_dict (); if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); goto out; } + if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32 (dict, (char *)name, 1); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "Failed to set dictionary value for %s", + name); + goto done; + } + + if (name) { + strcpy (key, name); + + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "key %s (%s)", key, strerror (op_errno)); + goto done; + } + + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + goto out; + } + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); + goto out; + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on key %s failed", key); + GF_FREE (value); + goto out; + } + goto done; + } + size = sys_flistxattr (_fd, NULL, 0); if (size == -1) { op_errno = errno; @@ -3376,11 +3275,12 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting " + "brick with 'user_xattr' flag)"); } else { gf_log (this->name, GF_LOG_ERROR, - "listxattr failed on %p: %s", + "listxattr failed on %p: %s", fd, strerror (op_errno)); } goto out; @@ -3392,7 +3292,6 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, list = alloca (size + 1); if (!list) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); goto out; } @@ -3405,39 +3304,60 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, break; strcpy (key, list + list_offset); - op_ret = sys_fgetxattr (_fd, key, NULL, 0); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); break; + } - value = CALLOC (op_ret + 1, sizeof(char)); + value = GF_CALLOC (size + 1, sizeof(char), + gf_posix_mt_char); if (!value) { + op_ret = -1; op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); goto out; } - op_ret = sys_fgetxattr (_fd, key, value, op_ret); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "the fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); break; + } - value [op_ret] = '\0'; - dict_set (dict, key, data_from_dynptr (value, op_ret)); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "failed on key %s", key); + GF_FREE (value); + goto out; + } remaining_size -= strlen (key) + 1; list_offset += strlen (key) + 1; } /* while (remaining_size > 0) */ - done: +done: op_ret = size; if (dict) { + dict_del (dict, GFID_XATTR_KEY); dict_ref (dict); } - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -3445,64 +3365,29 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, return 0; } - -int -fhandle_pair (xlator_t *this, int fd, - data_pair_t *trav, int flags) +static int +_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) { - int sys_ret = -1; - int ret = 0; - - sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data, - trav->value->len, flags); - - if (sys_ret < 0) { - if (errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported"); - } else if (errno == ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr on fd=%d failed: %s", fd, - strerror (errno)); - } else { - -#ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "fd=%d: key:%s error:%s", - fd, trav->key, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_ERROR, - "fd=%d: key:%s error:%s", - fd, trav->key, - strerror (errno)); -#endif /* DARWIN */ - } - - ret = -errno; - goto out; - } + posix_xattr_filler_t *filler = NULL; -out: - return ret; -} + filler = tmp; + return posix_fhandle_pair (filler->this, filler->fd, k, v, + filler->flags); +} int32_t posix_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags) + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; int _fd = -1; - data_pair_t * trav = NULL; - int ret = -1; + int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -3512,103 +3397,190 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); VALIDATE_OR_GOTO (dict, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL from fd=%p", fd); + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - trav = dict->members_list; + dict_del (dict, GFID_XATTR_KEY); - while (trav) { - ret = fhandle_pair (this, _fd, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.fd = _fd; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); return 0; } +int +_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *) data; + this = filler->this; + + op_ret = sys_lremovexattr (filler->real_path, key); + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr failed on %s (for %s): %s", + filler->real_path, key, strerror (errno)); + } + + return op_ret; +} + int32_t posix_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + if (!strcmp (GFID_XATTR_KEY, name)) { + gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" + " on gfid for file %s", real_path); + op_ret = -1; + goto out; + } + SET_FS_ID (frame->root->uid, frame->root->gid); + /** + * sending an empty key name with xdata containing the + * list of key(s) to be removed implies "bulk remove request" + * for removexattr. + */ + if (name && (strcmp (name, "") == 0) && xdata) { + filler.real_path = real_path; + filler.this = this; + op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); + if (op_ret) { + op_errno = filler.op_errno; + } + + goto out; + } + op_ret = sys_lremovexattr (real_path, name); + if (op_ret == -1) { + op_errno = errno; + if (op_errno != ENOATTR && op_errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr on %s (for %s): %s", real_path, + name, strerror (op_errno)); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +posix_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd * pfd = NULL; + int _fd = -1; + int ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + if (!strcmp (GFID_XATTR_KEY, name)) { + gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" + " on gfid for file"); + goto out; + } + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + + + + SET_FS_ID (frame->root->uid, frame->root->gid); + + op_ret = sys_fremovexattr (_fd, name); if (op_ret == -1) { op_errno = errno; - if (op_errno != ENOATTR && op_errno != EPERM) - gf_log (this->name, GF_LOG_ERROR, - "removexattr on %s: %s", loc->path, - strerror (op_errno)); + if (op_errno != ENOATTR && op_errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "fremovexattr (for %s): %s", + name, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_fsyncdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync) + fd_t *fd, int datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - struct posix_fd * pfd = NULL; - int _fd = -1; int ret = -1; - uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - - _fd = pfd->fd; op_ret = 0; - out: - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno); +out: + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); return 0; } @@ -3616,12 +3588,12 @@ posix_fsyncdir (call_frame_t *frame, xlator_t *this, void posix_print_xattr (dict_t *this, - char *key, - data_t *value, - void *data) + char *key, + data_t *value, + void *data) { - gf_log ("posix", GF_LOG_DEBUG, - "(key/val) = (%s/%d)", key, data_to_int32 (value)); + gf_log ("posix", GF_LOG_DEBUG, + "(key/val) = (%s/%d)", key, data_to_int32 (value)); } @@ -3635,199 +3607,259 @@ posix_print_xattr (dict_t *this, static void __add_array (int32_t *dest, int32_t *src, int count) { - int i = 0; - for (i = 0; i < count; i++) { - dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i])); - } + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32 (dest[i]); + if (destval == 0xffffffff) + continue; + dest[i] = hton32 (destval + ntoh32 (src[i])); + } } +static void +__or_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i])); + } +} -/** - * xattrop - xattr operations - for internal use by GlusterFS - * @optype: ADD_ARRAY: - * dict should contain: - * "key" ==> array of 32-bit numbers - */ +static void +__and_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i])); + } +} -int -do_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +static void +__add_long_array (int64_t *dest, int64_t *src, int count) { - char *real_path = NULL; - int32_t *array = NULL; - int size = 0; - int count = 0; + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i])); + } +} - int op_ret = 0; - int op_errno = 0; +static int +_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + + count = v->len; + array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); + + LOCK (&inode->lock); + { + if (filler->real_path) { + size = sys_lgetxattr (filler->real_path, k, + (char *)array, v->len); + } else { + size = sys_fgetxattr (filler->fd, k, (char *)array, + v->len); + } - int ret = 0; - int _fd = -1; - uint64_t tmp_pfd = 0; - struct posix_fd *pfd = NULL; + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && + (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr (marker_xattrs, + k)) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s while doing " + "xattrop: Key:%s (%s)", + filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fgetxattr failed on fd=%d while doing " + "xattrop: Key:%s (%s)", + filler->fd, + k, strerror (op_errno)); + } - data_pair_t *trav = NULL; + op_ret = -1; + goto unlock; + } - char * path = NULL; - inode_t * inode = NULL; + switch (optype) { - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (xattr, out); - VALIDATE_OR_GOTO (this, out); + case GF_XATTROP_ADD_ARRAY: + __add_array ((int32_t *) array, (int32_t *) v->data, + v->len / 4); + break; - trav = xattr->members_list; + case GF_XATTROP_ADD_ARRAY64: + __add_long_array ((int64_t *) array, (int64_t *) v->data, + v->len / 8); + break; - if (fd) { - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "failed to get pfd from fd=%p", - fd); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; - _fd = pfd->fd; - } + case GF_XATTROP_OR_ARRAY: + __or_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; - if (loc && loc->path) - MAKE_REAL_PATH (real_path, this, loc->path); + case GF_XATTROP_AND_ARRAY: + __and_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; - if (loc) { - path = strdup (loc->path); - inode = loc->inode; - } else { - inode = fd->inode; + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown xattrop type (%d) on %s. Please send " + "a bug report to gluster-devel@nongnu.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr (filler->real_path, k, array, + v->len, 0); + } else { + size = sys_fsetxattr (filler->fd, k, (char *)array, + v->len, 0); + } } +unlock: + UNLOCK (&inode->lock); - while (trav) { - count = trav->value->len / sizeof (int32_t); - array = CALLOC (count, sizeof (int32_t)); + if (op_ret == -1) + goto out; - LOCK (&inode->lock); - { - if (loc) { - size = sys_lgetxattr (real_path, trav->key, (char *)array, - trav->value->len); - } else { - size = sys_fgetxattr (_fd, trav->key, (char *)array, - trav->value->len); - } + op_errno = errno; + if (size == -1) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "setxattr failed on %s while doing xattrop: " + "key=%s (%s)", filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fsetxattr failed on fd=%d while doing xattrop: " + "key=%s (%s)", filler->fd, + k, strerror (op_errno)); - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported by filesystem"); - } else { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on %s while doing " - "xattrop: %s", path, - strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fgetxattr failed on fd=%d while doing " - "xattrop: %s", _fd, - strerror (op_errno)); - } + op_ret = -1; + goto out; + } else { + size = dict_set_bin (d, k, array, v->len); - op_ret = -1; - goto unlock; - } + if (size != 0) { + if (filler->real_path) + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", filler->real_path, + k, strerror (-size)); + else + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (fd=%d): " + "key=%s (%s)", filler->fd, + k, strerror (-size)); - switch (optype) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + array = NULL; + } - case GF_XATTROP_ADD_ARRAY: - __add_array (array, (int32_t *) trav->value->data, - trav->value->len / 4); - break; + array = NULL; - default: - gf_log (this->name, GF_LOG_ERROR, - "Unknown xattrop type (%d) on %s. Please send " - "a bug report to gluster-devel@nongnu.org", - optype, path); - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } +out: + return op_ret; +} - if (loc) { - size = sys_lsetxattr (real_path, trav->key, array, - trav->value->len, 0); - } else { - size = sys_fsetxattr (_fd, trav->key, (char *)array, - trav->value->len, 0); - } - } - unlock: - UNLOCK (&inode->lock); +/** + * xattrop - xattr operations - for internal use by GlusterFS + * @optype: ADD_ARRAY: + * dict should contain: + * "key" ==> array of 32-bit numbers + */ - if (op_ret == -1) +int +do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr) +{ + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0,}; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (xattr, out); + VALIDATE_OR_GOTO (this, out); + + if (fd) { + op_ret = posix_fd_ctx_get (fd, this, &pfd); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get pfd from fd=%p", + fd); + op_errno = EBADFD; goto out; + } + _fd = pfd->fd; + } - op_errno = errno; - if (size == -1) { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "setxattr failed on %s while doing xattrop: " - "key=%s (%s)", path, - trav->key, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr failed on fd=%d while doing xattrop: " - "key=%s (%s)", _fd, - trav->key, strerror (op_errno)); + if (loc && !uuid_is_null (loc->gfid)) + MAKE_INODE_HANDLE (real_path, this, loc, NULL); - op_ret = -1; - goto out; - } else { - size = dict_set_bin (xattr, trav->key, array, - trav->value->len); - - if (size != 0) { - if (loc) - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (path=%s): " - "key=%s (%s)", path, - trav->key, strerror (-size)); - else - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (fd=%d): " - "key=%s (%s)", _fd, - trav->key, strerror (-size)); + if (real_path) { + inode = loc->inode; + } else if (fd) { + inode = fd->inode; + } - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } + filler.this = this; + filler.fd = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; - array = NULL; - trav = trav->next; - } + op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, + &filler); out: - if (array) - FREE (array); - - if (path) - FREE (path); - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr); - return 0; + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL); + return 0; } int posix_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop (frame, this, loc, NULL, optype, xattr); return 0; @@ -3836,7 +3868,7 @@ posix_xattrop (call_frame_t *frame, xlator_t *this, int posix_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop (frame, this, NULL, fd, optype, xattr); return 0; @@ -3845,7 +3877,7 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this, int posix_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) + loc_t *loc, int32_t mask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -3858,37 +3890,36 @@ posix_access (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); op_ret = access (real_path, mask & 07); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; - struct stat preop = {0,}; - struct stat postop = {0,}; + struct iatt preop = {0,}; + struct iatt postop = {0,}; struct posix_fd *pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -3901,18 +3932,17 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gen (this, _fd, &preop); + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -3925,13 +3955,13 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "ftruncate failed on fd=%p: %s", - fd, strerror (errno)); + gf_log (this->name, GF_LOG_ERROR, + "ftruncate failed on fd=%p (%"PRId64": %s", + fd, offset, strerror (errno)); goto out; } - op_ret = posix_fstat_with_gen (this, _fd, &postop); + op_ret = posix_fdstat (this, _fd, &postop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -3942,321 +3972,27 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, op_ret = 0; - out: +out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, + &postop, NULL); return 0; } -static int -same_file_type (mode_t m1, mode_t m2) -{ - return ((S_IFMT & (m1 ^ m2)) == 0); -} - - -static int -ensure_file_type (xlator_t *this, char *pathname, mode_t mode) -{ - struct stat stbuf = {0,}; - int op_ret = 0; - int ret = -1; - - ret = posix_lstat_with_gen (this, pathname, &stbuf); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "stat failed while trying to make sure entry %s " - "is a directory: %s", pathname, strerror (errno)); - goto out; - } - - if (!same_file_type (mode, stbuf.st_mode)) { - op_ret = -EEXIST; - gf_log (this->name, GF_LOG_ERROR, - "entry %s is a different type of file " - "than expected", pathname); - goto out; - } - out: - return op_ret; -} - -static int -create_entry (xlator_t *this, int32_t flags, - dir_entry_t *entry, char *pathname) -{ - int op_ret = 0; - int ret = -1; - struct timeval tv[2] = {{0,0},{0,0}}; - - if (S_ISDIR (entry->buf.st_mode)) { - /* - * If the entry is directory, create it by - * calling 'mkdir'. If the entry is already - * present, check if it is a directory, - * and issue a warning if otherwise. - */ - - ret = mkdir (pathname, entry->buf.st_mode); - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, pathname, - entry->buf.st_mode); - } - else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "mkdir %s with mode (0%o) failed: %s", - pathname, entry->buf.st_mode, - strerror (errno)); - goto out; - } - } - - } else if ((flags & GF_SET_IF_NOT_PRESENT) - || !(flags & GF_SET_DIR_ONLY)) { - - /* create a 0-byte file here */ - - if (S_ISREG (entry->buf.st_mode)) { - ret = open (pathname, O_CREAT|O_EXCL, - entry->buf.st_mode); - - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, - pathname, - entry->buf.st_mode); - } - else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "Error creating file %s with " - "mode (0%o): %s", - pathname, entry->buf.st_mode, - strerror (errno)); - goto out; - } - } - - close (ret); - - } else if (S_ISLNK (entry->buf.st_mode)) { - ret = symlink (entry->link, pathname); - - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, - pathname, - entry->buf.st_mode); - } - else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "error creating symlink %s: %s" - , pathname, strerror (errno)); - goto out; - } - } - - } else if (S_ISBLK (entry->buf.st_mode) || - S_ISCHR (entry->buf.st_mode) || - S_ISFIFO (entry->buf.st_mode) || - S_ISSOCK (entry->buf.st_mode)) { - - ret = mknod (pathname, entry->buf.st_mode, - entry->buf.st_dev); - - if (ret == -1) { - if (errno == EEXIST) { - op_ret = ensure_file_type (this, - pathname, - entry->buf.st_mode); - } else { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "error creating device file " - "%s: %s", - pathname, strerror (errno)); - goto out; - } - } - } else { - gf_log (this->name, GF_LOG_ERROR, - "invalid mode 0%o for %s", entry->buf.st_mode, - pathname); - op_ret = -EINVAL; - goto out; - } - } - - /* - * Preserve atime and mtime - */ - - if (!S_ISLNK (entry->buf.st_mode)) { - tv[0].tv_sec = entry->buf.st_atime; - tv[1].tv_sec = entry->buf.st_mtime; - ret = utimes (pathname, tv); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "utimes %s failed: %s", - pathname, strerror (errno)); - goto out; - } - } - -out: - return op_ret; - -} - - -int -posix_setdents (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t flags, dir_entry_t *entries, - int32_t count) -{ - char * real_path = NULL; - char * entry_path = NULL; - int32_t real_path_len = -1; - int32_t entry_path_len = -1; - int32_t ret = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_fd * pfd = {0, }; - struct timeval tv[2] = {{0, }, {0, }}; - uint64_t tmp_pfd = 0; - char pathname[ZR_PATH_MAX] = {0,}; - dir_entry_t * trav = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (entries, out); - - tv[0].tv_sec = tv[0].tv_usec = 0; - tv[1].tv_sec = tv[1].tv_usec = 0; - - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_DEBUG, - "fd's ctx not found on fd=%p for %s", - fd, this->name); - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; - - real_path = pfd->path; - - if (!real_path) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_DEBUG, - "path is NULL on pfd=%p fd=%p", pfd, fd); - goto out; - } - - real_path_len = strlen (real_path); - entry_path_len = real_path_len + 256; - entry_path = CALLOC (1, entry_path_len); - - if (!entry_path) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "Out of memory."); - goto out; - } - - strcpy (entry_path, real_path); - entry_path[real_path_len] = '/'; - - /* fd exists, and everything looks fine */ - /** - * create an entry for each one present in '@entries' - * - if flag is set (ie, if its namespace), create both directories - * and files - * - if not set, create only directories. - * - * after the entry is created, change the mode and ownership of the - * entry according to the stat present in entries->buf. - */ - - trav = entries->next; - while (trav) { - strcpy (pathname, entry_path); - strcat (pathname, trav->name); - - ret = create_entry (this, flags, trav, pathname); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - /* TODO: handle another flag, GF_SET_OVERWRITE */ - - /* Change the mode */ - if (!S_ISLNK (trav->buf.st_mode)) { - ret = chmod (pathname, trav->buf.st_mode); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chmod on %s failed: %s", pathname, - strerror (op_errno)); - goto out; - } - } - - /* change the ownership */ - ret = lchown (pathname, trav->buf.st_uid, trav->buf.st_gid); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chmod on %s failed: %s", pathname, - strerror (op_errno)); - goto out; - } - - if (flags & GF_SET_EPOCH_TIME) { - ret = utimes (pathname, tv); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "utimes on %s failed: %s", pathname, - strerror (op_errno)); - goto out; - } - } - - /* consider the next entry */ - trav = trav->next; - } - - op_ret = 0; - out: - STACK_UNWIND_STRICT (setdents, frame, op_ret, op_errno); - if (entry_path) - FREE (entry_path); - - return 0; -} - int32_t posix_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int _fd = -1; int32_t op_ret = -1; int32_t op_errno = 0; - struct stat buf = {0,}; + struct iatt buf = {0,}; struct posix_fd *pfd = NULL; - uint64_t tmp_pfd = 0; int ret = -1; - struct posix_private *priv = NULL; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -4268,18 +4004,17 @@ posix_fstat (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gen (this, _fd, &buf); + op_ret = posix_fdstat (this, _fd, &buf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s", @@ -4292,7 +4027,7 @@ posix_fstat (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, NULL); return 0; } @@ -4300,147 +4035,102 @@ static int gf_posix_lk_log; int32_t posix_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct flock *lock) + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - struct flock nullock = {0, }; + struct gf_flock nullock = {0, }; - gf_posix_lk_log++; - - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock); + STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); return 0; } int32_t posix_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *lock) + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is not loaded. " - "You need to use it for proper functioning of GlusterFS"); + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *lock) + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is not loaded. " - "You need to use it for proper functioning of GlusterFS"); + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is not loaded. " - "You need to use it for proper functioning of GlusterFS"); + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - gf_log (this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is not loaded. " - " You need to use it for proper functioning of GlusterFS"); + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); - STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); return 0; } -int32_t -posix_do_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, int whichop) +int +posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { - uint64_t tmp_pfd = 0; - struct posix_fd *pfd = NULL; - DIR *dir = NULL; - int ret = -1; - size_t filled = 0; - int count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - gf_dirent_t *this_entry = NULL; - gf_dirent_t entries; - struct dirent *entry = NULL; - off_t in_case = -1; + off_t in_case = -1; + size_t filled = 0; + int count = 0; + char entrybuf[sizeof(struct dirent) + 256 + 8]; + struct dirent *entry = NULL; int32_t this_size = -1; - char *real_path = NULL; - int real_path_len = -1; - char *entry_path = NULL; - int entry_path_len = -1; - struct posix_private *priv = NULL; - struct stat stbuf = {0, }; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - INIT_LIST_HEAD (&entries.list); - - priv = this->private; - - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; - if (!pfd->path) { - op_errno = EBADFD; - gf_log (this->name, GF_LOG_DEBUG, - "pfd does not have path set (possibly file " - "fd, fd=%p)", fd); - goto out; - } - - real_path = pfd->path; - real_path_len = strlen (real_path); - - entry_path_len = real_path_len + NAME_MAX; - entry_path = alloca (entry_path_len); - - if (!entry_path) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - - strncpy (entry_path, real_path, entry_path_len); - entry_path[real_path_len] = '/'; - - dir = pfd->dir; + gf_dirent_t *this_entry = NULL; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; - if (!dir) { - gf_log (this->name, GF_LOG_DEBUG, - "dir is NULL for fd=%p", fd); - op_errno = EINVAL; - goto out; + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; } - if (!off) { rewinddir (dir); } else { @@ -4451,275 +4141,260 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, in_case = telldir (dir); if (in_case == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "telldir failed on dir=%p: %s", + gf_log (THIS->name, GF_LOG_ERROR, + "telldir failed on dir=%p: %s", dir, strerror (errno)); goto out; } errno = 0; - entry = readdir (dir); + entry = NULL; + readdir_r (dir, (struct dirent *)entrybuf, &entry); if (!entry) { if (errno == EBADF) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "readdir failed on dir=%p: %s", - dir, strerror (op_errno)); + gf_log (THIS->name, GF_LOG_WARNING, + "readdir failed on dir=%p: %s", + dir, strerror (errno)); goto out; } break; } - this_size = dirent_size (entry); +#ifdef __NetBSD__ + /* + * NetBSD with UFS1 backend uses backing files for + * extended attributes. They can be found in a + * .attribute file located at the root of the filesystem + * We hide it to glusterfs clients, since chaos will occur + * when the cluster/dht xlator decides to distribute + * exended attribute backing file accross storage servers. + */ + if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) + && (!strcmp(entry->d_name, ".attribute"))) + continue; +#endif /* __NetBSD__ */ + + if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) + && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) { + continue; + } + + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } + } + + this_size = max (sizeof (gf_dirent_t), + sizeof (gfs3_dirplist)) + + strlen (entry->d_name) + 1; if (this_size + filled > size) { seekdir (dir, in_case); break; } - /* Device spanning requires that we have a stat buf for the - * file so we need to perform a stat on the two conditions - * below. - */ - if ((whichop == GF_FOP_READDIRP) || (priv->span_devices)) { - strcpy (entry_path + real_path_len + 1, entry->d_name); - op_ret = posix_lstat_with_gen (this, entry_path, &stbuf); - if (-1 == op_ret) - continue; - } else - stbuf.st_ino = entry->d_ino; - - /* So at this point stbuf ino is either: - * a. the original inode number got from entry, in case this - * was a readdir fop or if device spanning was disabled. - * - * b. the scaled inode number, if device spanning was enabled - * or this was a readdirp fop. - */ - entry->d_ino = stbuf.st_ino; - this_entry = gf_dirent_for_name (entry->d_name); if (!this_entry) { - gf_log (this->name, GF_LOG_ERROR, + gf_log (THIS->name, GF_LOG_ERROR, "could not create gf_dirent for entry %s: (%s)", entry->d_name, strerror (errno)); goto out; } this_entry->d_off = telldir (dir); this_entry->d_ino = entry->d_ino; - this_entry->d_stat = stbuf; + this_entry->d_type = entry->d_type; - list_add_tail (&this_entry->list, &entries.list); + list_add_tail (&this_entry->list, &entries->list); filled += this_size; count ++; } - op_ret = count; - errno = 0; if ((!readdir (dir) && (errno == 0))) - op_errno = ENOENT; + /* Indicate EOF */ + errno = ENOENT; +out: + return count; +} - out: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries); +dict_t * +posix_entry_xattr_fill (xlator_t *this, inode_t *inode, + fd_t *fd, char *name, dict_t *dict, + struct iatt *stbuf) +{ + loc_t tmp_loc = {0,}; + char *entry_path = NULL; - gf_dirent_free (&entries); + /* if we don't send the 'loc', open-fd-count be a problem. */ + tmp_loc.inode = inode; - return 0; -} + MAKE_HANDLE_PATH (entry_path, this, fd->inode->gfid, name); + return posix_lookup_xattr_fill (this, entry_path, + &tmp_loc, dict, stbuf); -int32_t -posix_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) -{ - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR); - return 0; } -int32_t -posix_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) +int +posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) { - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP); - return 0; + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = {0, }; + uuid_t gfid; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + + list_for_each_entry (entry, &entries->list, list) { + memset (gfid, 0, 16); + inode = inode_grep (fd->inode->table, fd->inode, + entry->d_name); + if (inode) + uuid_copy (gfid, inode->gfid); + + strcpy (&hpath[len+1], entry->d_name); + + posix_pstat (this, gfid, hpath, &stbuf); + + if (!inode) + inode = inode_find (itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new (itable); + + entry->inode = inode; + + if (dict) { + entry->dict = + posix_entry_xattr_fill (this, entry->inode, + fd, entry->d_name, + dict, &stbuf); + dict_ref (entry->dict); + } + + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + inode = NULL; + } + + return 0; } int32_t -posix_stats (call_frame_t *frame, xlator_t *this, - int32_t flags) - +posix_do_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) { - int32_t op_ret = -1; - int32_t op_errno = 0; - - struct xlator_stats xlstats = {0, }; - struct xlator_stats * stats = NULL; - struct statvfs buf = {0,}; - struct timeval tv = {0,}; - struct posix_private * priv = (struct posix_private *)this->private; - - int64_t avg_read = 0; - int64_t avg_write = 0; - int64_t _time_ms = 0; - - DECLARE_OLD_FS_ID_VAR; + struct posix_fd *pfd = NULL; + DIR *dir = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + int32_t skip_dirs = 0; - SET_FS_ID (frame->root->uid, frame->root->gid); VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); - stats = &xlstats; - - op_ret = statvfs (priv->base_path, &buf); + INIT_LIST_HEAD (&entries.list); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s", - strerror (op_errno)); + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; goto out; } - /* client info is maintained at FSd */ - stats->nr_clients = priv->stats.nr_clients; - stats->nr_files = priv->stats.nr_files; - - /* number of free block in the filesystem. */ - stats->free_disk = buf.f_bfree * buf.f_bsize; - - stats->total_disk_size = buf.f_blocks * buf.f_bsize; - stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize; + dir = pfd->dir; - /* Calculate read and write usage */ - op_ret = gettimeofday (&tv, NULL); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "gettimeofday failed: %s", strerror (errno)); + if (!dir) { + gf_log (this->name, GF_LOG_WARNING, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; goto out; - } - - LOCK (&priv->lock); - { - /* Read */ - _time_ms = (tv.tv_sec - priv->init_time.tv_sec) * 1000 + - ((tv.tv_usec - priv->init_time.tv_usec) / 1000); - - avg_read = (_time_ms) ? (priv->read_value / _time_ms) : 0; /* KBps */ - avg_write = (_time_ms) ? (priv->write_value / _time_ms) : 0; /* KBps */ - - _time_ms = (tv.tv_sec - priv->prev_fetch_time.tv_sec) * 1000 + - ((tv.tv_usec - priv->prev_fetch_time.tv_usec) / 1000); - - if (_time_ms && ((priv->interval_read / _time_ms) > priv->max_read)) { - priv->max_read = (priv->interval_read / _time_ms); - } + } - if (_time_ms && - ((priv->interval_write / _time_ms) > priv->max_write)) { - priv->max_write = priv->interval_write / _time_ms; - } + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK (&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK (&fd->lock); - stats->read_usage = avg_read / priv->max_read; - stats->write_usage = avg_write / priv->max_write; - } - UNLOCK (&priv->lock); + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; - op_ret = gettimeofday (&(priv->prev_fetch_time), NULL); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "gettimeofday failed: %s", - strerror (op_errno)); + if (whichop != GF_FOP_READDIRP) goto out; - } - priv->interval_read = 0; - priv->interval_write = 0; + posix_readdirp_fill (this, fd, &entries, dict); - op_ret = 0; +out: + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); - out: - SET_TO_OLD_FS_ID (); + gf_dirent_free (&entries); - STACK_UNWIND (frame, op_ret, op_errno, stats); return 0; } + int32_t -posix_checksum (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flag) +posix_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) { - char * real_path = NULL; - DIR * dir = NULL; - struct dirent * dirent = NULL; - uint8_t file_checksum[NAME_MAX] = {0,}; - uint8_t dir_checksum[NAME_MAX] = {0,}; - int32_t op_ret = -1; - int32_t op_errno = 0; - int i = 0; - int length = 0; - - struct stat buf = {0,}; - char tmp_real_path[ZR_PATH_MAX] = {0,}; - int ret = -1; - - MAKE_REAL_PATH (real_path, this, loc->path); - - dir = opendir (real_path); - - if (!dir){ - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "opendir() failed on `%s': %s", - real_path, strerror (op_errno)); - goto out; - } - - while ((dirent = readdir (dir))) { - errno = 0; - if (!dirent) { - if (errno != 0) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "readdir() failed on dir=%p: %s", - dir, strerror (errno)); - goto out; - } - break; - } - - length = strlen (dirent->d_name); - - strcpy (tmp_real_path, real_path); - strcat (tmp_real_path, "/"); - strcat (tmp_real_path, dirent->d_name); - ret = posix_lstat_with_gen (this, tmp_real_path, &buf); - - if (ret == -1) - continue; - - if (S_ISDIR (buf.st_mode)) { - for (i = 0; i < length; i++) - dir_checksum[i] ^= dirent->d_name[i]; - } else { - for (i = 0; i < length; i++) - file_checksum[i] ^= dirent->d_name[i]; - } - } - closedir (dir); - - op_ret = 0; + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); + return 0; +} - out: - STACK_UNWIND_STRICT (checksum, frame, op_ret, op_errno, - file_checksum, dir_checksum); +int32_t +posix_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *dict) +{ + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict); return 0; } @@ -4728,30 +4403,24 @@ posix_priv (xlator_t *this) { struct posix_private *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, - this->name); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); gf_proc_dump_add_section(key_prefix); - if (!this) + if (!this) return 0; priv = this->private; - if (!priv) + if (!priv) return 0; - gf_proc_dump_build_key(key, key_prefix, "base_path"); - gf_proc_dump_write(key,"%s", priv->base_path); - gf_proc_dump_build_key(key, key_prefix, "base_path_length"); - gf_proc_dump_write(key,"%d", priv->base_path_length); - gf_proc_dump_build_key(key, key_prefix, "max_read"); - gf_proc_dump_write(key,"%d", priv->max_read); - gf_proc_dump_build_key(key, key_prefix, "max_write"); - gf_proc_dump_write(key,"%d", priv->max_write); - gf_proc_dump_build_key(key, key_prefix, "stats.nr_files"); - gf_proc_dump_write(key,"%ld", priv->stats.nr_files); + gf_proc_dump_write("base_path","%s", priv->base_path); + gf_proc_dump_write("base_path_length","%d", priv->base_path_length); + gf_proc_dump_write("max_read","%d", priv->read_value); + gf_proc_dump_write("max_write","%d", priv->write_value); + gf_proc_dump_write("nr_files","%ld", priv->nr_files); return 0; } @@ -4765,67 +4434,72 @@ posix_inode (xlator_t *this) int32_t posix_rchecksum (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, int32_t len) + fd_t *fd, off_t offset, int32_t len, dict_t *xdata) { - char *buf = NULL; - - int _fd = -1; - uint64_t tmp_pfd = 0; - - struct posix_fd *pfd = NULL; - - int op_ret = -1; - int op_errno = 0; - - int ret = 0; - - int32_t weak_checksum = 0; - uint8_t strong_checksum[MD5_DIGEST_LEN]; + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + int32_t weak_checksum = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - memset (strong_checksum, 0, MD5_DIGEST_LEN); + priv = this->private; + memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - buf = CALLOC (1, len); - if (!buf) { + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); goto out; } - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long) tmp_pfd; _fd = pfd->fd; - ret = pread (_fd, buf, len, offset); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pread of %d bytes returned %d (%s)", - len, ret, strerror (errno)); + LOCK (&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect (fd, pfd, 0, offset, len); + + ret = pread (_fd, buf, len, offset); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pread of %d bytes returned %d (%s)", + len, ret, strerror (errno)); + + op_errno = errno; + } - op_errno = errno; - goto out; } + UNLOCK (&fd->lock); - weak_checksum = gf_rsync_weak_checksum (buf, len); - gf_rsync_strong_checksum (buf, len, strong_checksum); + if (ret < 0) + goto out; - FREE (buf); + weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len); + gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum); op_ret = 0; out: STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, - weak_checksum, strong_checksum); + weak_checksum, strong_checksum, NULL); + + GF_FREE (alloc_buf); + return 0; } @@ -4840,37 +4514,156 @@ notify (xlator_t *this, ...) { switch (event) - { - case GF_EVENT_PARENT_UP: - { - /* Tell the parent that posix xlator is up */ - default_notify (this, GF_EVENT_CHILD_UP, data); - } - break; - default: - /* */ - break; - } + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that posix xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + default: + /* */ + break; + } return 0; } +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = this->private; + + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "uid/gid for brick path %s, %s", + priv->base_path, strerror (errno)); + + return ret; +} + + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str = NULL; + + priv = this->private; + + GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); + + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); + + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (uuid_is_null (priv->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + posix_spawn_health_check_thread (this); + + ret = 0; +out: + return ret; +} + + /** * init - */ int init (xlator_t *this) { - int ret = 0; - int op_ret = -1; - gf_boolean_t tmp_bool = 0; - struct stat buf = {0,}; - struct posix_private * _private = NULL; - data_t * dir_data = NULL; - data_t * tmp_data = NULL; - uint64_t time64 = 0; - - int dict_ret = 0; - int32_t janitor_sleep; + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = {0,}; + gf_boolean_t tmp_bool = 0; + int dict_ret = 0; + int ret = 0; + int op_ret = -1; + ssize_t size = -1; + int32_t janitor_sleep = 0; + uuid_t old_uuid = {0,}; + uuid_t dict_uuid = {0,}; + uuid_t gfid = {0,}; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + char *guuid = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -4881,10 +4674,10 @@ init (xlator_t *this) goto out; } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling. Please check the volume file."); - } + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling. Please check the volume file."); + } if (!dir_data) { gf_log (this->name, GF_LOG_CRITICAL, @@ -4900,178 +4693,244 @@ init (xlator_t *this) if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { gf_log (this->name, GF_LOG_ERROR, "Directory '%s' doesn't exist, exiting.", - dir_data->data); + dir_data->data); ret = -1; goto out; } - /* Check for Extended attribute support, if not present, log it */ op_ret = sys_lsetxattr (dir_data->data, - "trusted.glusterfs.test", "working", 8, 0); - if (op_ret < 0) { - tmp_data = dict_get (this->options, - "mandate-attribute"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &tmp_bool) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for key " - "\"mandate-attribute\""); - ret = -1; - goto out; - } - if (!tmp_bool) { - gf_log (this->name, GF_LOG_WARNING, - "Extended attribute not supported, " - "starting as per option"); - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Extended attribute not supported, " - "exiting."); - ret = -1; - goto out; - } - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Extended attribute not supported, exiting."); - ret = -1; - goto out; - } + "trusted.glusterfs.test", "working", 8, 0); + if (op_ret == 0) { + sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); + } else { + tmp_data = dict_get (this->options, + "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &tmp_bool) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "wrong option provided for key " + "\"mandate-attribute\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_log (this->name, GF_LOG_WARNING, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Extended attribute not supported, " + "exiting."); + ret = -1; + goto out; + } + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Extended attribute not supported, exiting."); + ret = -1; + goto out; + } } - _private = CALLOC (1, sizeof (*_private)); - if (!_private) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); + tmp_data = dict_get (this->options, "volume-id"); + if (tmp_data) { + op_ret = uuid_parse (tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "wrong volume-id (%s) set in volume file", + tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { + if (uuid_compare (old_uuid, dict_uuid)) { + gf_log (this->name, GF_LOG_ERROR, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa (old_uuid)); + ret = -1; + goto out; + } + } else if ((size == -1) && (errno == ENODATA)) { + + gf_log (this->name, GF_LOG_ERROR, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fetch volume-id (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } else { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid (gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa (gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: wrong value set as gfid", + dir_data->data); ret = -1; goto out; + } else if ((size == -1) && (errno != ENODATA)) { + /* Wrong 'gfid' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fetch gfid (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + 16, XATTR_CREATE); + if (size) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set gfid (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } } - _private->base_path = strdup (dir_data->data); - _private->base_path_length = strlen (_private->base_path); - - _private->trash_path = CALLOC (1, _private->base_path_length - + strlen ("/") - + strlen (GF_REPLICATE_TRASH_DIR) - + 1); + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) + gf_log (this->name, GF_LOG_WARNING, + "Posix access control list is not supported."); - if (!_private->trash_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); + ret = 0; + _private = GF_CALLOC (1, sizeof (*_private), + gf_posix_mt_posix_private); + if (!_private) { ret = -1; goto out; } - strncpy (_private->trash_path, _private->base_path, _private->base_path_length); - strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR); + _private->base_path = gf_strdup (dir_data->data); + _private->base_path_length = strlen (_private->base_path); LOCK_INIT (&_private->lock); - ret = gethostname (_private->hostname, 256); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "could not find hostname (%s)", strerror (errno)); - } - - { - /* Stats related variables */ - gettimeofday (&_private->init_time, NULL); - gettimeofday (&_private->prev_fetch_time, NULL); - _private->max_read = 1; - _private->max_write = 1; + ret = dict_get_str (this->options, "hostname", &_private->hostname); + if (ret) { + _private->hostname = GF_CALLOC (256, sizeof (char), + gf_common_mt_char); + if (!_private->hostname) { + goto out; + } + ret = gethostname (_private->hostname, 256); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not find hostname (%s)", strerror (errno)); + } } _private->export_statfs = 1; tmp_data = dict_get (this->options, "export-statfs-size"); if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->export_statfs) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "'export-statfs-size' takes only boolean " - "options"); - goto out; - } + if (gf_string2boolean (tmp_data->data, + &_private->export_statfs) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } if (!_private->export_statfs) gf_log (this->name, GF_LOG_DEBUG, - "'statfs()' returns dummy size"); + "'statfs()' returns dummy size"); } _private->background_unlink = 0; tmp_data = dict_get (this->options, "background-unlink"); if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->background_unlink) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "'background-unlink' takes only boolean " - "options"); - goto out; - } + if (gf_string2boolean (tmp_data->data, + &_private->background_unlink) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "'background-unlink' takes only boolean " + "options"); + goto out; + } if (_private->background_unlink) gf_log (this->name, GF_LOG_DEBUG, - "unlinks will be performed in background"); + "unlinks will be performed in background"); } tmp_data = dict_get (this->options, "o-direct"); if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->o_direct) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for 'o-direct'"); - goto out; - } - if (_private->o_direct) + if (gf_string2boolean (tmp_data->data, + &_private->o_direct) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "wrong option provided for 'o-direct'"); + goto out; + } + if (_private->o_direct) gf_log (this->name, GF_LOG_DEBUG, "o-direct mode is enabled (O_DIRECT " - "for every open)"); + "for every open)"); } - _private->num_devices_to_span = 1; - - tmp_data = dict_get (this->options, "span-devices"); - if (tmp_data) { - if (gf_string2int32 (tmp_data->data, - &_private->num_devices_to_span) == -1) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "wrong option provided for 'span-devices'"); - goto out; - } - if (_private->num_devices_to_span > 1) { - gf_log (this->name, GF_LOG_NORMAL, - "spanning enabled accross %d mounts", - _private->num_devices_to_span); - _private->span_devices = 1; - } - if (_private->num_devices_to_span < 1) - _private->num_devices_to_span = 1; + ret = dict_get_str (this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (uuid_parse (guuid, _private->glusterd_uuid)) + gf_log (this->name, GF_LOG_WARNING, "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_log (this->name, GF_LOG_DEBUG, "No glusterd (node) UUID " + "passed - node-uuid xattr request will return " + "\"No such attribute\""); } - _private->st_device = CALLOC (1, (sizeof (dev_t) * - _private->num_devices_to_span)); - - /* Start with the base */ - _private->st_device[0] = buf.st_dev; + ret = 0; _private->janitor_sleep_duration = 600; - dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", + dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", &janitor_sleep); - if (dict_ret == 0) { - gf_log (this->name, GF_LOG_DEBUG, - "Setting janitor sleep duration to %d.", - janitor_sleep); - - _private->janitor_sleep_duration = janitor_sleep; - } - - LOCK_INIT (&_private->gen_lock); - time64 = time (NULL); - _private->gen_seq = (time64 << 32); + if (dict_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Setting janitor sleep duration to %d.", + janitor_sleep); + _private->janitor_sleep_duration = janitor_sleep; + } + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = opendir (dir_data->data); + if (!_private->mount_lock) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Could not lock brick directory"); + goto out; + } #ifndef GF_DARWIN_HOST_OS { struct rlimit lim; @@ -5080,20 +4939,20 @@ init (xlator_t *this) if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { gf_log (this->name, GF_LOG_WARNING, - "Failed to set 'ulimit -n " - " 1048576': %s", strerror(errno)); + "Failed to set 'ulimit -n " + " 1048576': %s", strerror(errno)); lim.rlim_cur = 65536; lim.rlim_max = 65536; if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { gf_log (this->name, GF_LOG_WARNING, - "Failed to set maximum allowed open " - "file descriptors to 64k: %s", + "Failed to set maximum allowed open " + "file descriptors to 64k: %s", strerror(errno)); } else { - gf_log (this->name, GF_LOG_NORMAL, - "Maximum allowed open file descriptors " + gf_log (this->name, GF_LOG_INFO, + "Maximum allowed open file descriptors " "set to 65536"); } } @@ -5101,12 +4960,85 @@ init (xlator_t *this) #endif this->private = (void *)_private; + op_ret = posix_handle_init (this); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, uint32, out); + GF_OPTION_INIT ("brick-gid", gid, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (uuid_is_null (_private->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + pthread_mutex_init (&_private->janitor_lock, NULL); pthread_cond_init (&_private->janitor_cond, NULL); INIT_LIST_HEAD (&_private->janitor_fds); posix_spawn_janitor_thread (this); - out: + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); +out: return ret; } @@ -5114,8 +5046,13 @@ void fini (xlator_t *this) { struct posix_private *priv = this->private; - sys_lremovexattr (priv->base_path, "trusted.glusterfs.test"); - FREE (priv); + if (!priv) + return; + this->private = NULL; + /*unlock brick dir*/ + if (priv->mount_lock) + closedir (priv->mount_lock); + GF_FREE (priv); return; } @@ -5124,10 +5061,6 @@ struct xlator_dumpops dumpops = { .inode = posix_inode, }; -struct xlator_mops mops = { - .stats = posix_stats, -}; - struct xlator_fops fops = { .lookup = posix_lookup, .stat = posix_stat, @@ -5155,45 +5088,105 @@ struct xlator_fops fops = { .getxattr = posix_getxattr, .fgetxattr = posix_fgetxattr, .removexattr = posix_removexattr, + .fremovexattr = posix_fremovexattr, .fsyncdir = posix_fsyncdir, .access = posix_access, .ftruncate = posix_ftruncate, .fstat = posix_fstat, .lk = posix_lk, - .inodelk = posix_inodelk, - .finodelk = posix_finodelk, - .entrylk = posix_entrylk, - .fentrylk = posix_fentrylk, - .setdents = posix_setdents, - .getdents = posix_getdents, - .checksum = posix_checksum, + .inodelk = posix_inodelk, + .finodelk = posix_finodelk, + .entrylk = posix_entrylk, + .fentrylk = posix_fentrylk, .rchecksum = posix_rchecksum, - .xattrop = posix_xattrop, - .fxattrop = posix_fxattrop, + .xattrop = posix_xattrop, + .fxattrop = posix_fxattrop, .setattr = posix_setattr, .fsetattr = posix_fsetattr, + .fallocate = _posix_fallocate, + .discard = posix_discard, + .zerofill = posix_zerofill, }; struct xlator_cbks cbks = { - .release = posix_release, - .releasedir = posix_releasedir, - .forget = posix_forget + .release = posix_release, + .releasedir = posix_releasedir, + .forget = posix_forget }; struct volume_options options[] = { - { .key = {"o-direct"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"directory"}, - .type = GF_OPTION_TYPE_PATH }, - { .key = {"export-statfs-size"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"mandate-attribute"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"span-devices"}, - .type = GF_OPTION_TYPE_INT }, + { .key = {"o-direct"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"directory"}, + .type = GF_OPTION_TYPE_PATH }, + { .key = {"hostname"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"mandate-attribute"}, + .type = GF_OPTION_TYPE_BOOL }, { .key = {"background-unlink"}, .type = GF_OPTION_TYPE_BOOL }, { .key = {"janitor-sleep-duration"}, .type = GF_OPTION_TYPE_INT }, - { .key = {NULL} } + { .key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"glusterd-uuid"}, + .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO" + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting uid of brick's owner" + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting gid of brick's owner" + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname" + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable" + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, + { .key = {NULL} } }; |
