From 8a0b115b20cfa2dd3c5a9e22a8244c9c2f03e17b Mon Sep 17 00:00:00 2001 From: ShyamsundarR Date: Tue, 28 Nov 2017 18:51:00 -0500 Subject: posix: Reorganize posix xlator to prepare for reuse with rio 1. Split out entry and inode/fd based FOPs into separate files from posix.c 2. Split out common routines (init, fini, reconf, and such) into its own file, from posix.c 3. Retain just the method assignments in posix.c (such that posix2 for RIO can assign its own methods in the future for entry operations and such) 4. Based on the split in (1) and (2) split out posix-handle.h into 2 files, such that macros that are needed for inode ops are in one and rest are in the other If the split is done as above, posix2 can compile with its own entry ops, and hence not compile, the entry ops as split in (1) above. The split described in (4) can again help posix2 to define its own macros to make entry and inode handles, thus not impact existing POSIX xlator code. Noted problems - There are path references in certain cases where quota is used (in the xattr FOPs), and thus will fail on reuse in posix2, this needs to be handled when we get there. - posix_init does set root GFID on the brick root, and this is incorrect for posix2, again will need handling later when posix2 evolves based on this code (other init checks seem fine on current inspection) Merge of experimental branch patches with the following gerrit change-IDs > Change-Id: I965ce6dffe70a62c697f790f3438559520e0af20 > Change-Id: I089a4d9cf470c2f9c121611e8ef18dea92b2be70 > Change-Id: I2cec103f6ba8f3084443f3066bcc70b2f5ecb49a Fixes gluster/glusterfs#327 Change-Id: I0ccfa78559a7c5a68f5e861e144cf856f5c9e19c Signed-off-by: ShyamsundarR --- xlators/storage/posix/src/posix-common.c | 1326 ++++++++++++++++++++++++++++++ 1 file changed, 1326 insertions(+) create mode 100644 xlators/storage/posix/src/posix-common.c (limited to 'xlators/storage/posix/src/posix-common.c') diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c new file mode 100644 index 00000000000..011bef8a720 --- /dev/null +++ b/xlators/storage/posix/src/posix-common.c @@ -0,0 +1,1326 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef GF_BSD_HOST_OS +#include +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include +#endif /* HAVE_LINKAT */ + +#include "glusterfs.h" +#include "checksum.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "posix-inode-handle.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +#include "posix-messages.h" +#include "events.h" +#include "posix-gfid-path.h" +#include "compat-uuid.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do { \ + old_fsuid = setfsuid (uid); \ + old_fsgid = setfsgid (gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() do { \ + setfsuid (old_fsuid); \ + setfsgid (old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (lutimes (path, tv)) +#endif + +int32_t +posix_priv (xlator_t *this) +{ + struct posix_private *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", + this->type, this->name); + gf_proc_dump_add_section(key_prefix); + + if (!this) + return 0; + + priv = this->private; + + if (!priv) + return 0; + + gf_proc_dump_write("base_path", "%s", priv->base_path); + gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); + gf_proc_dump_write("max_read", "%d", priv->read_value); + gf_proc_dump_write("max_write", "%d", priv->write_value); + gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + + return 0; +} + +int32_t +posix_inode (xlator_t *this) +{ + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +posix_notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + struct posix_private *priv = NULL; + + priv = this->private; + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that posix xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + case GF_EVENT_CLEANUP: + if (priv->health_check) { + priv->health_check_active = _gf_false; + pthread_cancel (priv->health_check); + priv->health_check = 0; + } + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; + pthread_cancel (priv->disk_space_check); + priv->disk_space_check = 0; + } + if (priv->janitor) { + (void) gf_thread_cleanup_xint (priv->janitor); + priv->janitor = 0; + } + if (priv->fsyncer) { + (void) gf_thread_cleanup_xint (priv->fsyncer); + priv->fsyncer = 0; + } + if (priv->mount_lock) { + (void) sys_closedir (priv->mount_lock); + priv->mount_lock = NULL; + } + + break; + default: + /* */ + break; + } + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); + + if (ret != 0) { + return ret; + } + + return ret; +} + +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + struct stat st = {0,}; + + priv = this->private; + + ret = sys_lstat (priv->base_path, &st); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, "Failed to stat " + "brick path %s", + priv->base_path); + return ret; + } + + if ((uid == -1 || st.st_uid == uid) && + (gid == -1 || st.st_gid == gid)) + return 0; + + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, "Failed to set uid/gid for" + " brick path %s", priv->base_path); + + return ret; +} +static int +set_gfid2path_separator (struct posix_private *priv, const char *str) +{ + int str_len = 0; + + str_len = strlen(str); + if (str_len > 0 && str_len < 8) { + strcpy (priv->gfid2path_sep, str); + return 0; + } + + return -1; +} + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + +#ifdef GF_DARWIN_HOST_OS +static int +set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->xattr_user_namespace = XATTR_NONE; + else if (strcmp (str, "strip") == 0) + priv->xattr_user_namespace = XATTR_STRIP; + else if (strcmp (str, "append") == 0) + priv->xattr_user_namespace = XATTR_APPEND; + else if (strcmp (str, "both") == 0) + priv->xattr_user_namespace = XATTR_BOTH; + else + return -1; + return 0; +} +#endif + +int +posix_reconfigure (xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str = NULL; + char *gfid2path_sep = NULL; + int32_t force_create_mode = -1; + int32_t force_directory_mode = -1; + int32_t create_mask = -1; + int32_t create_directory_mask = -1; + + priv = this->private; + + GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); + + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF ("gfid2path-separator", gfid2path_sep, options, + str, out); + if (set_gfid2path_separator (priv, gfid2path_sep) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, + options, str, out); + + if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } + +#endif + + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); + + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); + + GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks, + options, bool, out); + + GF_OPTION_RECONF ("gfid2path", priv->gfid2path, + options, bool, out); + + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (gf_uuid_is_null (priv->glusterd_uuid))) { + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to :"); + } + + GF_OPTION_RECONF ("reserve", priv->disk_reserve, + options, uint32, out); + if (priv->disk_reserve) + posix_spawn_disk_space_check_thread (this); + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + GF_OPTION_RECONF ("health-check-timeout", priv->health_check_timeout, + options, uint32, out); + posix_spawn_health_check_thread (this); + + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); + GF_OPTION_RECONF ("force-create-mode", force_create_mode, + options, int32, out); + priv->force_create_mode = force_create_mode; + + GF_OPTION_RECONF ("force-directory-mode", force_directory_mode, + options, int32, out); + priv->force_directory_mode = force_directory_mode; + + GF_OPTION_RECONF ("create-mask", create_mask, + options, int32, out); + priv->create_mask = create_mask; + + GF_OPTION_RECONF ("create-directory-mask", create_directory_mask, + options, int32, out); + priv->create_directory_mask = create_directory_mask; + + ret = 0; +out: + return ret; +} + +int32_t +posix_delete_unlink_entry (const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) { + + int ret = 0; + + if (!fpath) + goto out; + + switch (typeflag) { + case FTW_SL: + case FTW_NS: + case FTW_F: + case FTW_SLN: + ret = sys_unlink(fpath); + break; + case FTW_D: + case FTW_DP: + case FTW_DNR: + if (ftwbuf->level != 0) { + ret = sys_rmdir(fpath); + } + break; + default: + break; + } + if (ret) { + gf_msg ("posix_delete_unlink_entry", GF_LOG_WARNING, errno, + P_MSG_HANDLE_CREATE, + "Deletion of entries %s failed" + "Please delete it manually", + fpath); + } +out: + return 0; +} + +int32_t +posix_delete_unlink (const char *unlink_path) { + + int ret = -1; + int flags = 0; + + flags |= (FTW_DEPTH | FTW_PHYS); + + ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags); + if (ret) { + gf_msg ("posix_delete_unlink", GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Deleting files from %s failed", + unlink_path); + } + return ret; +} + +int32_t +posix_create_unlink_dir (xlator_t *this) { + + struct posix_private *priv = NULL; + struct stat stbuf; + int ret = -1; + uuid_t gfid = {0}; + char gfid_str[64] = {0}; + char unlink_path[PATH_MAX] = {0,}; + char landfill_path[PATH_MAX] = {0,}; + + priv = this->private; + + (void) snprintf (unlink_path, sizeof(unlink_path), "%s/%s", + priv->base_path, GF_UNLINK_PATH); + + gf_uuid_generate (gfid); + uuid_utoa_r (gfid, gfid_str); + + (void) snprintf (landfill_path, sizeof(landfill_path), "%s/%s/%s", + priv->base_path, GF_LANDFILL_PATH, gfid_str); + + ret = sys_stat (unlink_path, &stbuf); + switch (ret) { + case -1: + if (errno != ENOENT) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_CREATE, + "Checking for %s failed", + unlink_path); + return -1; + } + break; + case 0: + if (!S_ISDIR (stbuf.st_mode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Not a directory: %s", + unlink_path); + return -1; + } + ret = posix_delete_unlink (unlink_path); + return 0; + default: + break; + } + ret = sys_mkdir (unlink_path, 0600); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_CREATE, + "Creating directory %s failed", + unlink_path); + return -1; + } + + return 0; +} + +/** + * init - + */ +int +posix_init (xlator_t *this) +{ + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = {0,}; + gf_boolean_t tmp_bool = 0; + int ret = 0; + int op_ret = -1; + int op_errno = 0; + ssize_t size = -1; + uuid_t old_uuid = {0,}; + uuid_t dict_uuid = {0,}; + uuid_t gfid = {0,}; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + char *guuid = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str; + char *gfid2path_sep = NULL; + int force_create = -1; + int force_directory = -1; + int create_mask = -1; + int create_directory_mask = -1; + + dir_data = dict_get (this->options, "directory"); + + if (this->children) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR, + "FATAL: storage/posix cannot have subvolumes"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING, + "Volume is dangling. Please check the volume file."); + } + + if (!dir_data) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_EXPORT_DIR_MISSING, + "Export directory not specified in volume file."); + ret = -1; + goto out; + } + + umask (000); // umask `masking' is done at the client side + + /* Check whether the specified directory exists, if not log it. */ + op_ret = sys_stat (dir_data->data, &buf); + if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, + "Directory '%s' doesn't exist, exiting.", + dir_data->data); + ret = -1; + goto out; + } + + _private = GF_CALLOC (1, sizeof (*_private), + gf_posix_mt_posix_private); + if (!_private) { + ret = -1; + goto out; + } + + _private->base_path = gf_strdup (dir_data->data); + _private->base_path_length = strlen (_private->base_path); + + ret = dict_get_str (this->options, "hostname", &_private->hostname); + if (ret) { + _private->hostname = GF_CALLOC (256, sizeof (char), + gf_common_mt_char); + if (!_private->hostname) { + goto out; + } + ret = gethostname (_private->hostname, 256); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_HOSTNAME_MISSING, + "could not find hostname "); + } + } + + /* Check for Extended attribute support, if not present, log it */ + op_ret = sys_lsetxattr (dir_data->data, + "trusted.glusterfs.test", "working", 8, 0); + if (op_ret != -1) { + sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); + } else { + tmp_data = dict_get (this->options, + "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &tmp_bool) == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION, + "wrong option provided for key " + "\"mandate-attribute\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "exiting."); + ret = -1; + goto out; + } + } else { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, exiting."); + ret = -1; + goto out; + } + } + + tmp_data = dict_get (this->options, "volume-id"); + if (tmp_data) { + op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_VOLUME_ID, + "wrong volume-id (%s) set" + " in volume file", tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { + if (gf_uuid_compare (old_uuid, dict_uuid)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_VOLUME_ID, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa (old_uuid)); + gf_event (EVENT_POSIX_ALREADY_PART_OF_VOLUME, + "volume-id=%s;brick=%s:%s", + uuid_utoa (old_uuid), + _private->hostname, _private->base_path); + ret = -1; + goto out; + } + } else if ((size == -1) && + (errno == ENODATA || errno == ENOATTR)) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_VOLUME_ID_ABSENT, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + gf_event (EVENT_POSIX_BRICK_NOT_IN_VOLUME, + "brick=%s:%s", + _private->hostname, _private->base_path); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, + "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_VOLUME_ID_FETCH_FAILED, + "%s: failed to fetch volume-id", + dir_data->data); + ret = -1; + goto out; + } else { + ret = -1; + gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, + "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_VOLUME_ID_FETCH_FAILED, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid (gfid)) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa (gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: wrong value set as gfid", + dir_data->data); + ret = -1; + goto out; + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: failed to fetch gfid", + dir_data->data); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + 16, XATTR_CREATE); + if (size == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_GFID_SET_FAILED, + "%s: failed to set gfid", + dir_data->data); + ret = -1; + goto out; + } + } + + ret = 0; + + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_ACL_NOTSUP, + "Posix access control list is not supported."); + gf_event (EVENT_POSIX_ACL_NOT_SUPPORTED, + "brick=%s:%s", _private->hostname, _private->base_path); + } + + /* + * _XOPEN_PATH_MAX is the longest file path len we MUST + * support according to POSIX standard. When prepended + * by the brick base path it may exceed backed filesystem + * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If + * this is the case, chdir() to the brick base path and + * use relative paths when they are too long. See also + * MAKE_REAL_PATH in posix-handle.h + */ + _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); + if (_private->path_max != -1 && + _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { + ret = chdir(_private->base_path); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_BASEPATH_CHDIR_FAILED, + "chdir() to \"%s\" failed", + _private->base_path); + goto out; + } +#ifdef __NetBSD__ + /* + * At least on NetBSD, the chdir() above uncovers a + * race condition which cause file lookup to fail + * with ENODATA for a few seconds. The volume quickly + * reaches a sane state, but regression tests are fast + * enough to choke on it. The reason is obscure (as + * often with race conditions), but sleeping here for + * a second seems to workaround the problem. + */ + sleep(1); +#endif + } + + + LOCK_INIT (&_private->lock); + + _private->export_statfs = 1; + tmp_data = dict_get (this->options, "export-statfs-size"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->export_statfs) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } + if (!_private->export_statfs) + gf_msg_debug (this->name, 0, + "'statfs()' returns dummy size"); + } + + _private->background_unlink = 0; + tmp_data = dict_get (this->options, "background-unlink"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->background_unlink) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, "'background-unlink'" + " takes only boolean options"); + goto out; + } + + if (_private->background_unlink) + gf_msg_debug (this->name, 0, + "unlinks will be performed in background"); + } + + tmp_data = dict_get (this->options, "o-direct"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->o_direct) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "wrong option provided for 'o-direct'"); + goto out; + } + if (_private->o_direct) + gf_msg_debug (this->name, 0, "o-direct mode is enabled" + " (O_DIRECT for every open)"); + } + + tmp_data = dict_get (this->options, "update-link-count-parent"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->update_pgfid_nlinks) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION, "wrong value provided " + "for 'update-link-count-parent'"); + goto out; + } + if (_private->update_pgfid_nlinks) + gf_msg_debug (this->name, 0, "update-link-count-parent" + " is enabled. Thus for each file an " + "extended attribute representing the " + "number of hardlinks for that file " + "within the same parent directory is" + " set."); + } + + ret = dict_get_str (this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (gf_uuid_parse (guuid, _private->glusterd_uuid)) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_INVALID_NODE_UUID, "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_msg_debug (this->name, 0, "No glusterd (node) UUID passed -" + " node-uuid xattr request will return \"No such" + " attribute\""); + } + ret = 0; + + GF_OPTION_INIT ("janitor-sleep-duration", + _private->janitor_sleep_duration, int32, out); + + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = sys_opendir (dir_data->data); + if (!_private->mount_lock) { + ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_DIR_OPERATION_FAILED, + "Could not lock brick directory (%s)", + strerror (op_errno)); + goto out; + } +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_SET_ULIMIT_FAILED, + "Failed to set 'ulimit -n " + " 1048576'"); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_SET_FILE_MAX_FAILED, + "Failed to set maximum allowed open " + "file descriptors to 64k"); + } + else { + gf_msg (this->name, GF_LOG_INFO, 0, + P_MSG_MAX_FILE_OPEN, "Maximum allowed " + "open file descriptors set to 65536"); + } + } + } +#endif + _private->shared_brick_count = 1; + ret = dict_get_int32 (this->options, "shared-brick-count", + &_private->shared_brick_count); + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'shared-brick-count' takes only integer " + "values"); + goto out; + } + + this->private = (void *)_private; + + op_ret = posix_handle_init (this); + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_create_unlink_dir (this); + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Creation of unlink directory failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, int32, out); + GF_OPTION_INIT ("brick-gid", gid, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (gf_uuid_is_null (_private->glusterd_uuid))) { + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to :"); + } + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + GF_OPTION_INIT ("reserve", + _private->disk_reserve, uint32, out); + if (_private->disk_reserve) + posix_spawn_disk_space_check_thread (this); + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + GF_OPTION_INIT ("health-check-timeout", + _private->health_check_timeout, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + + pthread_mutex_init (&_private->janitor_lock, NULL); + pthread_cond_init (&_private->janitor_cond, NULL); + INIT_LIST_HEAD (&_private->janitor_fds); + + posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSYNCER_THREAD_CREATE_FAILED, + "fsyncer thread creation failed"); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("gfid2path", _private->gfid2path, bool, out); + + GF_OPTION_INIT ("gfid2path-separator", gfid2path_sep, str, out); + if (set_gfid2path_separator (_private, gfid2path_sep) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_INIT ("xattr-user-namespace-mode", + xattr_user_namespace_mode_str, str, out); + + if (set_xattr_user_namespace_mode (_private, + xattr_user_namespace_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } +#endif + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); + GF_OPTION_INIT ("force-create-mode", force_create, int32, out); + _private->force_create_mode = force_create; + + GF_OPTION_INIT ("force-directory-mode", force_directory, int32, out); + _private->force_directory_mode = force_directory; + + GF_OPTION_INIT ("create-mask", + create_mask, int32, out); + _private->create_mask = create_mask; + + GF_OPTION_INIT ("create-directory-mask", + create_directory_mask, int32, out); + _private->create_directory_mask = create_directory_mask; +out: + if (ret) { + if (_private) { + GF_FREE (_private->base_path); + + GF_FREE (_private->hostname); + + GF_FREE (_private->trash_path); + + GF_FREE (_private); + } + + this->private = NULL; + } + return ret; +} + +void +posix_fini (xlator_t *this) +{ + struct posix_private *priv = this->private; + if (!priv) + return; + this->private = NULL; + /*unlock brick dir*/ + if (priv->mount_lock) + (void) sys_closedir (priv->mount_lock); + + GF_FREE (priv->base_path); + GF_FREE (priv->hostname); + GF_FREE (priv->trash_path); + GF_FREE (priv); + + return; +} + +struct volume_options options[] = { + { .key = {"o-direct"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"directory"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = "{{brick.path}}" + }, + { .key = {"hostname"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"mandate-attribute"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"background-unlink"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"janitor-sleep-duration"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "10", + .description = "Interval (in seconds) between times the internal " + "'landfill' directory is emptied." + }, + { .key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY, + .default_value = "{{brick.volumeid}}" + }, + { .key = {"glusterd-uuid"}, + .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting uid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting gid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"health-check-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "10", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds to wait aio_write finish for health check, " + "set to 0 to disable", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"reserve"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "1", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Percentage of disk space to be reserved." + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order.", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"update-link-count-parent"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable placeholders for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"gfid2path"}, + .type = GF_OPTION_TYPE_BOOL, +#ifdef __NetBSD__ + /* + * NetBSD storage of extended attributes for UFS1 badly + * scales when the list of extended attributes names rises. + * This option can add as many extended attributes names + * as we have files, hence we keep it disabled for performance + * sake. + */ + .default_value = "off", +#else + .default_value = "on", +#endif + .description = "Enable logging metadata for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE + }, + { .key = {"gfid2path-separator"}, + .type = GF_OPTION_TYPE_STR, + .default_value = ":", + .description = "Path separator for glusterfs.gfidtopath virt xattr", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, +#if GF_DARWIN_HOST_OS + { .key = {"xattr-user-namespace-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "none", + .description = "Option to control XATTR user namespace on the raw filesystem: " + "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, +#endif + { .key = {"shared-brick-count"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .description = "Number of bricks sharing the same backend export." + " Useful for displaying the proper usable size through statvfs() " + "call (df command)", + }, + { .key = {"force-create-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Mode bit permission that will always be set on a file." + }, + { .key = {"force-directory-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Mode bit permission that will be always set on directory" + }, + { .key = {"create-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Any bit not set here will be removed from the" + "modes set on a file when it is created" + }, + { .key = {"create-directory-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Any bit not set here will be removed from the" + "modes set on a directory when it is created" + }, + { .key = {NULL} } +}; -- cgit