diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-common.c')
-rw-r--r-- | xlators/storage/posix/src/posix-common.c | 1326 |
1 files changed, 1326 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c new file mode 100644 index 00000000000..011bef8a720 --- /dev/null +++ b/xlators/storage/posix/src/posix-common.c @@ -0,0 +1,1326 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <ftw.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include "glusterfs.h" +#include "checksum.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "posix-inode-handle.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +#include "posix-messages.h" +#include "events.h" +#include "posix-gfid-path.h" +#include "compat-uuid.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do { \ + old_fsuid = setfsuid (uid); \ + old_fsgid = setfsgid (gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() do { \ + setfsuid (old_fsuid); \ + setfsgid (old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (lutimes (path, tv)) +#endif + +int32_t +posix_priv (xlator_t *this) +{ + struct posix_private *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", + this->type, this->name); + gf_proc_dump_add_section(key_prefix); + + if (!this) + return 0; + + priv = this->private; + + if (!priv) + return 0; + + gf_proc_dump_write("base_path", "%s", priv->base_path); + gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); + gf_proc_dump_write("max_read", "%d", priv->read_value); + gf_proc_dump_write("max_write", "%d", priv->write_value); + gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + + return 0; +} + +int32_t +posix_inode (xlator_t *this) +{ + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +posix_notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + struct posix_private *priv = NULL; + + priv = this->private; + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that posix xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + case GF_EVENT_CLEANUP: + if (priv->health_check) { + priv->health_check_active = _gf_false; + pthread_cancel (priv->health_check); + priv->health_check = 0; + } + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; + pthread_cancel (priv->disk_space_check); + priv->disk_space_check = 0; + } + if (priv->janitor) { + (void) gf_thread_cleanup_xint (priv->janitor); + priv->janitor = 0; + } + if (priv->fsyncer) { + (void) gf_thread_cleanup_xint (priv->fsyncer); + priv->fsyncer = 0; + } + if (priv->mount_lock) { + (void) sys_closedir (priv->mount_lock); + priv->mount_lock = NULL; + } + + break; + default: + /* */ + break; + } + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); + + if (ret != 0) { + return ret; + } + + return ret; +} + +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + struct stat st = {0,}; + + priv = this->private; + + ret = sys_lstat (priv->base_path, &st); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, "Failed to stat " + "brick path %s", + priv->base_path); + return ret; + } + + if ((uid == -1 || st.st_uid == uid) && + (gid == -1 || st.st_gid == gid)) + return 0; + + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, "Failed to set uid/gid for" + " brick path %s", priv->base_path); + + return ret; +} +static int +set_gfid2path_separator (struct posix_private *priv, const char *str) +{ + int str_len = 0; + + str_len = strlen(str); + if (str_len > 0 && str_len < 8) { + strcpy (priv->gfid2path_sep, str); + return 0; + } + + return -1; +} + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + +#ifdef GF_DARWIN_HOST_OS +static int +set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->xattr_user_namespace = XATTR_NONE; + else if (strcmp (str, "strip") == 0) + priv->xattr_user_namespace = XATTR_STRIP; + else if (strcmp (str, "append") == 0) + priv->xattr_user_namespace = XATTR_APPEND; + else if (strcmp (str, "both") == 0) + priv->xattr_user_namespace = XATTR_BOTH; + else + return -1; + return 0; +} +#endif + +int +posix_reconfigure (xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str = NULL; + char *gfid2path_sep = NULL; + int32_t force_create_mode = -1; + int32_t force_directory_mode = -1; + int32_t create_mask = -1; + int32_t create_directory_mask = -1; + + priv = this->private; + + GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); + + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF ("gfid2path-separator", gfid2path_sep, options, + str, out); + if (set_gfid2path_separator (priv, gfid2path_sep) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, + options, str, out); + + if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } + +#endif + + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); + + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); + + GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks, + options, bool, out); + + GF_OPTION_RECONF ("gfid2path", priv->gfid2path, + options, bool, out); + + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (gf_uuid_is_null (priv->glusterd_uuid))) { + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF ("reserve", priv->disk_reserve, + options, uint32, out); + if (priv->disk_reserve) + posix_spawn_disk_space_check_thread (this); + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + GF_OPTION_RECONF ("health-check-timeout", priv->health_check_timeout, + options, uint32, out); + posix_spawn_health_check_thread (this); + + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); + GF_OPTION_RECONF ("force-create-mode", force_create_mode, + options, int32, out); + priv->force_create_mode = force_create_mode; + + GF_OPTION_RECONF ("force-directory-mode", force_directory_mode, + options, int32, out); + priv->force_directory_mode = force_directory_mode; + + GF_OPTION_RECONF ("create-mask", create_mask, + options, int32, out); + priv->create_mask = create_mask; + + GF_OPTION_RECONF ("create-directory-mask", create_directory_mask, + options, int32, out); + priv->create_directory_mask = create_directory_mask; + + ret = 0; +out: + return ret; +} + +int32_t +posix_delete_unlink_entry (const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) { + + int ret = 0; + + if (!fpath) + goto out; + + switch (typeflag) { + case FTW_SL: + case FTW_NS: + case FTW_F: + case FTW_SLN: + ret = sys_unlink(fpath); + break; + case FTW_D: + case FTW_DP: + case FTW_DNR: + if (ftwbuf->level != 0) { + ret = sys_rmdir(fpath); + } + break; + default: + break; + } + if (ret) { + gf_msg ("posix_delete_unlink_entry", GF_LOG_WARNING, errno, + P_MSG_HANDLE_CREATE, + "Deletion of entries %s failed" + "Please delete it manually", + fpath); + } +out: + return 0; +} + +int32_t +posix_delete_unlink (const char *unlink_path) { + + int ret = -1; + int flags = 0; + + flags |= (FTW_DEPTH | FTW_PHYS); + + ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags); + if (ret) { + gf_msg ("posix_delete_unlink", GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Deleting files from %s failed", + unlink_path); + } + return ret; +} + +int32_t +posix_create_unlink_dir (xlator_t *this) { + + struct posix_private *priv = NULL; + struct stat stbuf; + int ret = -1; + uuid_t gfid = {0}; + char gfid_str[64] = {0}; + char unlink_path[PATH_MAX] = {0,}; + char landfill_path[PATH_MAX] = {0,}; + + priv = this->private; + + (void) snprintf (unlink_path, sizeof(unlink_path), "%s/%s", + priv->base_path, GF_UNLINK_PATH); + + gf_uuid_generate (gfid); + uuid_utoa_r (gfid, gfid_str); + + (void) snprintf (landfill_path, sizeof(landfill_path), "%s/%s/%s", + priv->base_path, GF_LANDFILL_PATH, gfid_str); + + ret = sys_stat (unlink_path, &stbuf); + switch (ret) { + case -1: + if (errno != ENOENT) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_CREATE, + "Checking for %s failed", + unlink_path); + return -1; + } + break; + case 0: + if (!S_ISDIR (stbuf.st_mode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Not a directory: %s", + unlink_path); + return -1; + } + ret = posix_delete_unlink (unlink_path); + return 0; + default: + break; + } + ret = sys_mkdir (unlink_path, 0600); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_CREATE, + "Creating directory %s failed", + unlink_path); + return -1; + } + + return 0; +} + +/** + * init - + */ +int +posix_init (xlator_t *this) +{ + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = {0,}; + gf_boolean_t tmp_bool = 0; + int ret = 0; + int op_ret = -1; + int op_errno = 0; + ssize_t size = -1; + uuid_t old_uuid = {0,}; + uuid_t dict_uuid = {0,}; + uuid_t gfid = {0,}; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + char *guuid = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str; + char *gfid2path_sep = NULL; + int force_create = -1; + int force_directory = -1; + int create_mask = -1; + int create_directory_mask = -1; + + dir_data = dict_get (this->options, "directory"); + + if (this->children) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR, + "FATAL: storage/posix cannot have subvolumes"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING, + "Volume is dangling. Please check the volume file."); + } + + if (!dir_data) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_EXPORT_DIR_MISSING, + "Export directory not specified in volume file."); + ret = -1; + goto out; + } + + umask (000); // umask `masking' is done at the client side + + /* Check whether the specified directory exists, if not log it. */ + op_ret = sys_stat (dir_data->data, &buf); + if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, + "Directory '%s' doesn't exist, exiting.", + dir_data->data); + ret = -1; + goto out; + } + + _private = GF_CALLOC (1, sizeof (*_private), + gf_posix_mt_posix_private); + if (!_private) { + ret = -1; + goto out; + } + + _private->base_path = gf_strdup (dir_data->data); + _private->base_path_length = strlen (_private->base_path); + + ret = dict_get_str (this->options, "hostname", &_private->hostname); + if (ret) { + _private->hostname = GF_CALLOC (256, sizeof (char), + gf_common_mt_char); + if (!_private->hostname) { + goto out; + } + ret = gethostname (_private->hostname, 256); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_HOSTNAME_MISSING, + "could not find hostname "); + } + } + + /* Check for Extended attribute support, if not present, log it */ + op_ret = sys_lsetxattr (dir_data->data, + "trusted.glusterfs.test", "working", 8, 0); + if (op_ret != -1) { + sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); + } else { + tmp_data = dict_get (this->options, + "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &tmp_bool) == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION, + "wrong option provided for key " + "\"mandate-attribute\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "exiting."); + ret = -1; + goto out; + } + } else { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, exiting."); + ret = -1; + goto out; + } + } + + tmp_data = dict_get (this->options, "volume-id"); + if (tmp_data) { + op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_VOLUME_ID, + "wrong volume-id (%s) set" + " in volume file", tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { + if (gf_uuid_compare (old_uuid, dict_uuid)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_VOLUME_ID, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa (old_uuid)); + gf_event (EVENT_POSIX_ALREADY_PART_OF_VOLUME, + "volume-id=%s;brick=%s:%s", + uuid_utoa (old_uuid), + _private->hostname, _private->base_path); + ret = -1; + goto out; + } + } else if ((size == -1) && + (errno == ENODATA || errno == ENOATTR)) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_VOLUME_ID_ABSENT, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + gf_event (EVENT_POSIX_BRICK_NOT_IN_VOLUME, + "brick=%s:%s", + _private->hostname, _private->base_path); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, + "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_VOLUME_ID_FETCH_FAILED, + "%s: failed to fetch volume-id", + dir_data->data); + ret = -1; + goto out; + } else { + ret = -1; + gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, + "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_VOLUME_ID_FETCH_FAILED, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid (gfid)) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa (gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: wrong value set as gfid", + dir_data->data); + ret = -1; + goto out; + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: failed to fetch gfid", + dir_data->data); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + 16, XATTR_CREATE); + if (size == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_GFID_SET_FAILED, + "%s: failed to set gfid", + dir_data->data); + ret = -1; + goto out; + } + } + + ret = 0; + + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_ACL_NOTSUP, + "Posix access control list is not supported."); + gf_event (EVENT_POSIX_ACL_NOT_SUPPORTED, + "brick=%s:%s", _private->hostname, _private->base_path); + } + + /* + * _XOPEN_PATH_MAX is the longest file path len we MUST + * support according to POSIX standard. When prepended + * by the brick base path it may exceed backed filesystem + * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If + * this is the case, chdir() to the brick base path and + * use relative paths when they are too long. See also + * MAKE_REAL_PATH in posix-handle.h + */ + _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); + if (_private->path_max != -1 && + _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { + ret = chdir(_private->base_path); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_BASEPATH_CHDIR_FAILED, + "chdir() to \"%s\" failed", + _private->base_path); + goto out; + } +#ifdef __NetBSD__ + /* + * At least on NetBSD, the chdir() above uncovers a + * race condition which cause file lookup to fail + * with ENODATA for a few seconds. The volume quickly + * reaches a sane state, but regression tests are fast + * enough to choke on it. The reason is obscure (as + * often with race conditions), but sleeping here for + * a second seems to workaround the problem. + */ + sleep(1); +#endif + } + + + LOCK_INIT (&_private->lock); + + _private->export_statfs = 1; + tmp_data = dict_get (this->options, "export-statfs-size"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->export_statfs) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } + if (!_private->export_statfs) + gf_msg_debug (this->name, 0, + "'statfs()' returns dummy size"); + } + + _private->background_unlink = 0; + tmp_data = dict_get (this->options, "background-unlink"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->background_unlink) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, "'background-unlink'" + " takes only boolean options"); + goto out; + } + + if (_private->background_unlink) + gf_msg_debug (this->name, 0, + "unlinks will be performed in background"); + } + + tmp_data = dict_get (this->options, "o-direct"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->o_direct) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "wrong option provided for 'o-direct'"); + goto out; + } + if (_private->o_direct) + gf_msg_debug (this->name, 0, "o-direct mode is enabled" + " (O_DIRECT for every open)"); + } + + tmp_data = dict_get (this->options, "update-link-count-parent"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->update_pgfid_nlinks) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION, "wrong value provided " + "for 'update-link-count-parent'"); + goto out; + } + if (_private->update_pgfid_nlinks) + gf_msg_debug (this->name, 0, "update-link-count-parent" + " is enabled. Thus for each file an " + "extended attribute representing the " + "number of hardlinks for that file " + "within the same parent directory is" + " set."); + } + + ret = dict_get_str (this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (gf_uuid_parse (guuid, _private->glusterd_uuid)) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_INVALID_NODE_UUID, "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_msg_debug (this->name, 0, "No glusterd (node) UUID passed -" + " node-uuid xattr request will return \"No such" + " attribute\""); + } + ret = 0; + + GF_OPTION_INIT ("janitor-sleep-duration", + _private->janitor_sleep_duration, int32, out); + + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = sys_opendir (dir_data->data); + if (!_private->mount_lock) { + ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_DIR_OPERATION_FAILED, + "Could not lock brick directory (%s)", + strerror (op_errno)); + goto out; + } +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_SET_ULIMIT_FAILED, + "Failed to set 'ulimit -n " + " 1048576'"); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_SET_FILE_MAX_FAILED, + "Failed to set maximum allowed open " + "file descriptors to 64k"); + } + else { + gf_msg (this->name, GF_LOG_INFO, 0, + P_MSG_MAX_FILE_OPEN, "Maximum allowed " + "open file descriptors set to 65536"); + } + } + } +#endif + _private->shared_brick_count = 1; + ret = dict_get_int32 (this->options, "shared-brick-count", + &_private->shared_brick_count); + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'shared-brick-count' takes only integer " + "values"); + goto out; + } + + this->private = (void *)_private; + + op_ret = posix_handle_init (this); + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_create_unlink_dir (this); + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Creation of unlink directory failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, int32, out); + GF_OPTION_INIT ("brick-gid", gid, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (gf_uuid_is_null (_private->glusterd_uuid))) { + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + GF_OPTION_INIT ("reserve", + _private->disk_reserve, uint32, out); + if (_private->disk_reserve) + posix_spawn_disk_space_check_thread (this); + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + GF_OPTION_INIT ("health-check-timeout", + _private->health_check_timeout, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + + pthread_mutex_init (&_private->janitor_lock, NULL); + pthread_cond_init (&_private->janitor_cond, NULL); + INIT_LIST_HEAD (&_private->janitor_fds); + + posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSYNCER_THREAD_CREATE_FAILED, + "fsyncer thread creation failed"); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("gfid2path", _private->gfid2path, bool, out); + + GF_OPTION_INIT ("gfid2path-separator", gfid2path_sep, str, out); + if (set_gfid2path_separator (_private, gfid2path_sep) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_INIT ("xattr-user-namespace-mode", + xattr_user_namespace_mode_str, str, out); + + if (set_xattr_user_namespace_mode (_private, + xattr_user_namespace_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } +#endif + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); + GF_OPTION_INIT ("force-create-mode", force_create, int32, out); + _private->force_create_mode = force_create; + + GF_OPTION_INIT ("force-directory-mode", force_directory, int32, out); + _private->force_directory_mode = force_directory; + + GF_OPTION_INIT ("create-mask", + create_mask, int32, out); + _private->create_mask = create_mask; + + GF_OPTION_INIT ("create-directory-mask", + create_directory_mask, int32, out); + _private->create_directory_mask = create_directory_mask; +out: + if (ret) { + if (_private) { + GF_FREE (_private->base_path); + + GF_FREE (_private->hostname); + + GF_FREE (_private->trash_path); + + GF_FREE (_private); + } + + this->private = NULL; + } + return ret; +} + +void +posix_fini (xlator_t *this) +{ + struct posix_private *priv = this->private; + if (!priv) + return; + this->private = NULL; + /*unlock brick dir*/ + if (priv->mount_lock) + (void) sys_closedir (priv->mount_lock); + + GF_FREE (priv->base_path); + GF_FREE (priv->hostname); + GF_FREE (priv->trash_path); + GF_FREE (priv); + + return; +} + +struct volume_options options[] = { + { .key = {"o-direct"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"directory"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = "{{brick.path}}" + }, + { .key = {"hostname"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"mandate-attribute"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"background-unlink"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"janitor-sleep-duration"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "10", + .description = "Interval (in seconds) between times the internal " + "'landfill' directory is emptied." + }, + { .key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY, + .default_value = "{{brick.volumeid}}" + }, + { .key = {"glusterd-uuid"}, + .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting uid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting gid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"health-check-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "10", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds to wait aio_write finish for health check, " + "set to 0 to disable", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"reserve"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "1", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Percentage of disk space to be reserved." + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order.", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"update-link-count-parent"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable placeholders for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"gfid2path"}, + .type = GF_OPTION_TYPE_BOOL, +#ifdef __NetBSD__ + /* + * NetBSD storage of extended attributes for UFS1 badly + * scales when the list of extended attributes names rises. + * This option can add as many extended attributes names + * as we have files, hence we keep it disabled for performance + * sake. + */ + .default_value = "off", +#else + .default_value = "on", +#endif + .description = "Enable logging metadata for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE + }, + { .key = {"gfid2path-separator"}, + .type = GF_OPTION_TYPE_STR, + .default_value = ":", + .description = "Path separator for glusterfs.gfidtopath virt xattr", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, +#if GF_DARWIN_HOST_OS + { .key = {"xattr-user-namespace-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "none", + .description = "Option to control XATTR user namespace on the raw filesystem: " + "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, +#endif + { .key = {"shared-brick-count"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .description = "Number of bricks sharing the same backend export." + " Useful for displaying the proper usable size through statvfs() " + "call (df command)", + }, + { .key = {"force-create-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Mode bit permission that will always be set on a file." + }, + { .key = {"force-directory-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Mode bit permission that will be always set on directory" + }, + { .key = {"create-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Any bit not set here will be removed from the" + "modes set on a file when it is created" + }, + { .key = {"create-directory-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Any bit not set here will be removed from the" + "modes set on a directory when it is created" + }, + { .key = {NULL} } +}; |