diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-common.c')
| -rw-r--r-- | xlators/storage/posix/src/posix-common.c | 1326 | 
1 files changed, 1326 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c new file mode 100644 index 00000000000..011bef8a720 --- /dev/null +++ b/xlators/storage/posix/src/posix-common.c @@ -0,0 +1,1326 @@ +/* +   Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> +   This file is part of GlusterFS. + +   This file is licensed to you under your choice of the GNU Lesser +   General Public License, version 3 or any later version (LGPLv3 or +   later), or the GNU General Public License, version 2 (GPLv2), in all +   cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <ftw.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include "glusterfs.h" +#include "checksum.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "posix-inode-handle.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +#include "posix-messages.h" +#include "events.h" +#include "posix-gfid-path.h" +#include "compat-uuid.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do {                \ +                old_fsuid = setfsuid (uid);     \ +                old_fsgid = setfsgid (gid);     \ +        } while (0) + +#define SET_TO_OLD_FS_ID() do {                 \ +                setfsuid (old_fsuid);           \ +                setfsgid (old_fsgid);           \ +        } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: +   The passed in `tv` can be +       struct timespec +   if supported (better, because it supports nanosecond resolution) or +       struct timeval +   otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ +        tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ +        (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ +        tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ +        (lutimes (path, tv)) +#endif + +int32_t +posix_priv (xlator_t *this) +{ +        struct posix_private *priv = NULL; +        char  key_prefix[GF_DUMP_MAX_BUF_LEN]; + +        (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", +                        this->type, this->name); +        gf_proc_dump_add_section(key_prefix); + +        if (!this) +                return 0; + +        priv = this->private; + +        if (!priv) +                return 0; + +        gf_proc_dump_write("base_path", "%s", priv->base_path); +        gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); +        gf_proc_dump_write("max_read", "%d", priv->read_value); +        gf_proc_dump_write("max_write", "%d", priv->write_value); +        gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + +        return 0; +} + +int32_t +posix_inode (xlator_t *this) +{ +        return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +posix_notify (xlator_t *this, +        int32_t event, +        void *data, +        ...) +{ +        struct posix_private *priv = NULL; + +        priv = this->private; +        switch (event) +        { +        case GF_EVENT_PARENT_UP: +        { +                /* Tell the parent that posix xlator is up */ +                default_notify (this, GF_EVENT_CHILD_UP, data); +        } +        break; +        case GF_EVENT_CLEANUP: +                if (priv->health_check) { +                        priv->health_check_active = _gf_false; +                        pthread_cancel (priv->health_check); +                        priv->health_check = 0; +                } +                if (priv->disk_space_check) { +                        priv->disk_space_check_active = _gf_false; +                        pthread_cancel (priv->disk_space_check); +                        priv->disk_space_check = 0; +                } +                if (priv->janitor) { +                        (void) gf_thread_cleanup_xint (priv->janitor); +                        priv->janitor = 0; +                } +                if (priv->fsyncer) { +                        (void) gf_thread_cleanup_xint (priv->fsyncer); +                        priv->fsyncer = 0; +                } +                if (priv->mount_lock) { +                        (void) sys_closedir (priv->mount_lock); +                        priv->mount_lock = NULL; +                } + +        break; +        default: +                /* */ +                break; +        } +        return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ +        int     ret = -1; + +        if (!this) +                return ret; + +        ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); + +        if (ret != 0) { +                return ret; +        } + +        return ret; +} + +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ +        struct posix_private *priv = NULL; +        int                   ret  = -1; +        struct stat st = {0,}; + +        priv = this->private; + +        ret = sys_lstat (priv->base_path, &st); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, errno, +                        P_MSG_DIR_OPERATION_FAILED, "Failed to stat " +                        "brick path %s", +                        priv->base_path); +                return ret; +        } + +        if ((uid == -1 || st.st_uid == uid) && +            (gid == -1 || st.st_gid == gid)) +                return 0; + +        ret = sys_chown (priv->base_path, uid, gid); +        if (ret) +                gf_msg (this->name, GF_LOG_ERROR, errno, +                        P_MSG_DIR_OPERATION_FAILED, "Failed to set uid/gid for" +                        " brick path %s", priv->base_path); + +        return ret; +} +static int +set_gfid2path_separator (struct posix_private *priv, const char *str) +{ +        int str_len = 0; + +        str_len = strlen(str); +        if (str_len > 0 && str_len < 8) { +                strcpy (priv->gfid2path_sep, str); +                return 0; +        } + +        return -1; +} + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ +        if (strcmp (str, "none") == 0) +                priv->batch_fsync_mode = BATCH_NONE; +        else if (strcmp (str, "syncfs") == 0) +                priv->batch_fsync_mode = BATCH_SYNCFS; +        else if (strcmp (str, "syncfs-single-fsync") == 0) +                priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; +        else if (strcmp (str, "syncfs-reverse-fsync") == 0) +                priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; +        else if (strcmp (str, "reverse-fsync") == 0) +                priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; +        else +                return -1; + +        return 0; +} + +#ifdef GF_DARWIN_HOST_OS +static int +set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) +{ +        if (strcmp (str, "none") == 0) +                priv->xattr_user_namespace = XATTR_NONE; +        else if (strcmp (str, "strip") == 0) +                priv->xattr_user_namespace = XATTR_STRIP; +        else if (strcmp (str, "append") == 0) +                priv->xattr_user_namespace = XATTR_APPEND; +        else if (strcmp (str, "both") == 0) +                priv->xattr_user_namespace = XATTR_BOTH; +        else +                return -1; +        return 0; +} +#endif + +int +posix_reconfigure (xlator_t *this, dict_t *options) +{ +        int                   ret = -1; +        struct posix_private *priv = NULL; +        int32_t               uid = -1; +        int32_t               gid = -1; +        char                 *batch_fsync_mode_str = NULL; +        char                 *gfid2path_sep = NULL; +        int32_t              force_create_mode = -1; +        int32_t              force_directory_mode = -1; +        int32_t              create_mask = -1; +        int32_t              create_directory_mask = -1; + +        priv = this->private; + +        GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); +        GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); +        if (uid != -1 || gid != -1) +                posix_set_owner (this, uid, gid); + +        GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, +                          options, uint32, out); + +        GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, +                          options, str, out); + +        if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, +                        "Unknown mode string: %s", batch_fsync_mode_str); +                goto out; +        } + +        GF_OPTION_RECONF ("gfid2path-separator", gfid2path_sep, options, +                          str, out); +        if (set_gfid2path_separator (priv, gfid2path_sep) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, +                        "Length of separator exceeds 7: %s", gfid2path_sep); +                goto out; +        } + +#ifdef GF_DARWIN_HOST_OS + +        char   *xattr_user_namespace_mode_str = NULL; + +        GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, +                          options, str, out); + +        if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT, +                        "Unknown xattr user namespace mode string: %s", +                        xattr_user_namespace_mode_str); +                goto out; +        } + +#endif + +        GF_OPTION_RECONF ("linux-aio", priv->aio_configured, +                          options, bool, out); + +        if (priv->aio_configured) +                posix_aio_on (this); +        else +                posix_aio_off (this); + +        GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks, +                          options, bool, out); + +        GF_OPTION_RECONF ("gfid2path", priv->gfid2path, +                          options, bool, out); + +        GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, +                          options, bool, out); + +        if (priv->node_uuid_pathinfo && +                        (gf_uuid_is_null (priv->glusterd_uuid))) { +                gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, +                        "glusterd uuid is NULL, pathinfo xattr would" +                        " fallback to <hostname>:<export>"); +        } + +        GF_OPTION_RECONF ("reserve", priv->disk_reserve, +                          options, uint32, out); +        if (priv->disk_reserve) +                posix_spawn_disk_space_check_thread (this); + +        GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, +                          options, uint32, out); +        GF_OPTION_RECONF ("health-check-timeout", priv->health_check_timeout, +                          options, uint32, out); +        posix_spawn_health_check_thread (this); + +        GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, +                          options, int32, out); +        GF_OPTION_RECONF ("force-create-mode", force_create_mode, +                          options, int32, out); +        priv->force_create_mode = force_create_mode; + +        GF_OPTION_RECONF ("force-directory-mode", force_directory_mode, +                          options, int32, out); +        priv->force_directory_mode = force_directory_mode; + +        GF_OPTION_RECONF ("create-mask", create_mask, +                          options, int32, out); +        priv->create_mask = create_mask; + +        GF_OPTION_RECONF ("create-directory-mask", create_directory_mask, +                          options, int32, out); +        priv->create_directory_mask = create_directory_mask; + +        ret = 0; +out: +        return ret; +} + +int32_t +posix_delete_unlink_entry (const char *fpath, const struct stat *sb, +                   int typeflag, struct FTW *ftwbuf) { + +        int    ret = 0; + +        if (!fpath) +                goto out; + +        switch (typeflag) { +        case FTW_SL: +        case FTW_NS: +        case FTW_F: +        case FTW_SLN: +                ret = sys_unlink(fpath); +                break; +        case FTW_D: +        case FTW_DP: +        case FTW_DNR: +                if (ftwbuf->level != 0) { +                        ret = sys_rmdir(fpath); +                } +                break; +        default: +                break; +        } +        if (ret) { +                gf_msg ("posix_delete_unlink_entry", GF_LOG_WARNING, errno, +                        P_MSG_HANDLE_CREATE, +                        "Deletion of entries %s failed" +                        "Please delete it manually", +                        fpath); +        } +out: +        return 0; +} + +int32_t +posix_delete_unlink (const char *unlink_path) { + +        int    ret = -1; +        int    flags = 0; + +        flags |= (FTW_DEPTH | FTW_PHYS); + +        ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags); +        if (ret) { +                gf_msg ("posix_delete_unlink", GF_LOG_ERROR, 0, +                        P_MSG_HANDLE_CREATE, +                        "Deleting files from  %s failed", +                        unlink_path); +        } +        return ret; +} + +int32_t +posix_create_unlink_dir (xlator_t *this) { + +        struct posix_private *priv = NULL; +        struct stat           stbuf; +        int                   ret = -1; +        uuid_t                gfid = {0}; +        char                  gfid_str[64] = {0}; +        char                  unlink_path[PATH_MAX] = {0,}; +        char                  landfill_path[PATH_MAX] = {0,}; + +        priv = this->private; + +        (void) snprintf (unlink_path, sizeof(unlink_path), "%s/%s", +                         priv->base_path, GF_UNLINK_PATH); + +        gf_uuid_generate (gfid); +        uuid_utoa_r (gfid, gfid_str); + +        (void) snprintf (landfill_path, sizeof(landfill_path), "%s/%s/%s", +                         priv->base_path, GF_LANDFILL_PATH, gfid_str); + +        ret = sys_stat (unlink_path, &stbuf); +        switch (ret) { +        case -1: +                if (errno != ENOENT) { +                        gf_msg (this->name, GF_LOG_ERROR, errno, +                                P_MSG_HANDLE_CREATE, +                                "Checking for %s failed", +                                unlink_path); +                        return -1; +                } +                break; +        case 0: +                if (!S_ISDIR (stbuf.st_mode)) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_HANDLE_CREATE, +                                "Not a directory: %s", +                                unlink_path); +                        return -1; +                } +                ret = posix_delete_unlink (unlink_path); +                return 0; +        default: +                break; +        } +        ret = sys_mkdir (unlink_path, 0600); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, errno, +                        P_MSG_HANDLE_CREATE, +                        "Creating directory %s failed", +                        unlink_path); +                return -1; +        } + +        return 0; +} + +/** + * init - + */ +int +posix_init (xlator_t *this) +{ +        struct posix_private *_private      = NULL; +        data_t               *dir_data      = NULL; +        data_t               *tmp_data      = NULL; +        struct stat           buf           = {0,}; +        gf_boolean_t          tmp_bool      = 0; +        int                   ret           = 0; +        int                   op_ret        = -1; +        int                   op_errno      = 0; +        ssize_t               size          = -1; +        uuid_t                old_uuid      = {0,}; +        uuid_t                dict_uuid     = {0,}; +        uuid_t                gfid          = {0,}; +        uuid_t                rootgfid      = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; +        char                 *guuid         = NULL; +        int32_t               uid           = -1; +        int32_t               gid           = -1; +        char                 *batch_fsync_mode_str; +        char                 *gfid2path_sep = NULL; +        int                  force_create  = -1; +        int                  force_directory = -1; +        int                  create_mask  = -1; +        int                  create_directory_mask = -1; + +        dir_data = dict_get (this->options, "directory"); + +        if (this->children) { +                gf_msg (this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR, +                        "FATAL: storage/posix cannot have subvolumes"); +                ret = -1; +                goto out; +        } + +        if (!this->parents) { +                gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING, +                        "Volume is dangling. Please check the volume file."); +        } + +        if (!dir_data) { +                gf_msg (this->name, GF_LOG_CRITICAL, 0, +                        P_MSG_EXPORT_DIR_MISSING, +                        "Export directory not specified in volume file."); +                ret = -1; +                goto out; +        } + +        umask (000); // umask `masking' is done at the client side + +        /* Check whether the specified directory exists, if not log it. */ +        op_ret = sys_stat (dir_data->data, &buf); +        if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, +                        "Directory '%s' doesn't exist, exiting.", +                        dir_data->data); +                ret = -1; +                goto out; +        } + +        _private = GF_CALLOC (1, sizeof (*_private), +                              gf_posix_mt_posix_private); +        if (!_private) { +                ret = -1; +                goto out; +        } + +        _private->base_path = gf_strdup (dir_data->data); +        _private->base_path_length = strlen (_private->base_path); + +        ret = dict_get_str (this->options, "hostname", &_private->hostname); +        if (ret) { +                _private->hostname = GF_CALLOC (256, sizeof (char), +                                                gf_common_mt_char); +                if (!_private->hostname) { +                        goto out; +                } +                ret = gethostname (_private->hostname, 256); +                if (ret < 0) { +                        gf_msg (this->name, GF_LOG_WARNING, errno, +                                P_MSG_HOSTNAME_MISSING, +                                "could not find hostname "); +                } +        } + +        /* Check for Extended attribute support, if not present, log it */ +        op_ret = sys_lsetxattr (dir_data->data, +                                "trusted.glusterfs.test", "working", 8, 0); +        if (op_ret != -1) { +                sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); +        } else { +                tmp_data = dict_get (this->options, +                                     "mandate-attribute"); +                if (tmp_data) { +                        if (gf_string2boolean (tmp_data->data, +                                               &tmp_bool) == -1) { +                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                        P_MSG_INVALID_OPTION, +                                        "wrong option provided for key " +                                        "\"mandate-attribute\""); +                                ret = -1; +                                goto out; +                        } +                        if (!tmp_bool) { +                                gf_msg (this->name, GF_LOG_WARNING, 0, +                                        P_MSG_XATTR_NOTSUP, +                                        "Extended attribute not supported, " +                                        "starting as per option"); +                        } else { +                                gf_msg (this->name, GF_LOG_CRITICAL, 0, +                                        P_MSG_XATTR_NOTSUP, +                                        "Extended attribute not supported, " +                                        "exiting."); +                                ret = -1; +                                goto out; +                        } +                } else { +                        gf_msg (this->name, GF_LOG_CRITICAL, 0, +                                P_MSG_XATTR_NOTSUP, +                                "Extended attribute not supported, exiting."); +                        ret = -1; +                        goto out; +                } +        } + +        tmp_data = dict_get (this->options, "volume-id"); +        if (tmp_data) { +                op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); +                if (op_ret < 0) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_INVALID_VOLUME_ID, +                                "wrong volume-id (%s) set" +                                " in volume file", tmp_data->data); +                        ret = -1; +                        goto out; +                } +                size = sys_lgetxattr (dir_data->data, +                                      "trusted.glusterfs.volume-id", old_uuid, 16); +                if (size == 16) { +                        if (gf_uuid_compare (old_uuid, dict_uuid)) { +                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                        P_MSG_INVALID_VOLUME_ID, +                                        "mismatching volume-id (%s) received. " +                                        "already is a part of volume %s ", +                                        tmp_data->data, uuid_utoa (old_uuid)); +                                gf_event (EVENT_POSIX_ALREADY_PART_OF_VOLUME, +                                        "volume-id=%s;brick=%s:%s", +                                        uuid_utoa (old_uuid), +                                       _private->hostname, _private->base_path); +                                ret = -1; +                                goto out; +                        } +                } else if ((size == -1) && +                           (errno == ENODATA || errno == ENOATTR)) { +                                gf_msg (this->name, GF_LOG_ERROR, errno, +                                        P_MSG_VOLUME_ID_ABSENT, +                                        "Extended attribute trusted.glusterfs." +                                        "volume-id is absent"); +                                gf_event (EVENT_POSIX_BRICK_NOT_IN_VOLUME, +                                        "brick=%s:%s", +                                       _private->hostname, _private->base_path); +                                ret = -1; +                                goto out; + +                }  else if ((size == -1) && (errno != ENODATA) && +                            (errno != ENOATTR)) { +                        /* Wrong 'volume-id' is set, it should be error */ +                        gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, +                                "brick=%s:%s", +                                _private->hostname, _private->base_path); +                        gf_msg (this->name, GF_LOG_WARNING, errno, +                                P_MSG_VOLUME_ID_FETCH_FAILED, +                                "%s: failed to fetch volume-id", +                                dir_data->data); +                        ret = -1; +                        goto out; +                } else { +                        ret = -1; +                        gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, +                                "brick=%s:%s", +                                _private->hostname, _private->base_path); +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_VOLUME_ID_FETCH_FAILED, +                                "failed to fetch proper volume id from export"); +                        goto out; +                } +        } + +        /* Now check if the export directory has some other 'gfid', +           other than that of root '/' */ +        size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); +        if (size == 16) { +                if (!__is_root_gfid (gfid)) { +                        gf_msg (this->name, GF_LOG_WARNING, errno, +                                P_MSG_GFID_SET_FAILED, +                                "%s: gfid (%s) is not that of glusterfs '/' ", +                                dir_data->data, uuid_utoa (gfid)); +                        ret = -1; +                        goto out; +                } +        } else if (size != -1) { +                /* Wrong 'gfid' is set, it should be error */ +                gf_msg (this->name, GF_LOG_WARNING, errno, +                        P_MSG_GFID_SET_FAILED, +                        "%s: wrong value set as gfid", +                        dir_data->data); +                ret = -1; +                goto out; +        } else if ((size == -1) && (errno != ENODATA) && +                   (errno != ENOATTR)) { +                /* Wrong 'gfid' is set, it should be error */ +                gf_msg (this->name, GF_LOG_WARNING, errno, +                        P_MSG_GFID_SET_FAILED, +                        "%s: failed to fetch gfid", +                        dir_data->data); +                ret = -1; +                goto out; +        } else { +                /* First time volume, set the GFID */ +                size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, +                                     16, XATTR_CREATE); +                if (size == -1) { +                        gf_msg (this->name, GF_LOG_ERROR, errno, +                                P_MSG_GFID_SET_FAILED, +                                "%s: failed to set gfid", +                                dir_data->data); +                        ret = -1; +                        goto out; +                } +        } + +        ret = 0; + +        size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, +                              NULL, 0); +        if ((size < 0) && (errno == ENOTSUP)) { +                gf_msg (this->name, GF_LOG_WARNING, errno, +                        P_MSG_ACL_NOTSUP, +                        "Posix access control list is not supported."); +                gf_event (EVENT_POSIX_ACL_NOT_SUPPORTED, +                        "brick=%s:%s", _private->hostname, _private->base_path); +        } + +        /* +         * _XOPEN_PATH_MAX is the longest file path len we MUST +         * support according to POSIX standard. When prepended +         * by the brick base path it may exceed backed filesystem +         * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If +         * this is the case, chdir() to the brick base path and +         * use relative paths when they are too long. See also +         * MAKE_REAL_PATH in posix-handle.h +          */ +        _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); +        if (_private->path_max != -1 && +            _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { +                ret = chdir(_private->base_path); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_BASEPATH_CHDIR_FAILED, +                                "chdir() to \"%s\" failed", +                                _private->base_path); +                        goto out; +                } +#ifdef __NetBSD__ +                /* +                 * At least on NetBSD, the chdir() above uncovers a +                 * race condition which cause file lookup to fail +                 * with ENODATA for a few seconds. The volume quickly +                 * reaches a sane state, but regression tests are fast +                 * enough to choke on it. The reason is obscure (as +                 * often with race conditions), but sleeping here for +                 * a second seems to workaround the problem. +                 */ +                sleep(1); +#endif +        } + + +        LOCK_INIT (&_private->lock); + +        _private->export_statfs = 1; +        tmp_data = dict_get (this->options, "export-statfs-size"); +        if (tmp_data) { +                if (gf_string2boolean (tmp_data->data, +                                       &_private->export_statfs) == -1) { +                        ret = -1; +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_INVALID_OPTION_VAL, +                                "'export-statfs-size' takes only boolean " +                                "options"); +                        goto out; +                } +                if (!_private->export_statfs) +                        gf_msg_debug (this->name, 0, +                                "'statfs()' returns dummy size"); +        } + +        _private->background_unlink = 0; +        tmp_data = dict_get (this->options, "background-unlink"); +        if (tmp_data) { +                if (gf_string2boolean (tmp_data->data, +                                       &_private->background_unlink) == -1) { +                        ret = -1; +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_INVALID_OPTION_VAL, "'background-unlink'" +                                " takes only boolean options"); +                        goto out; +                } + +                if (_private->background_unlink) +                        gf_msg_debug (this->name, 0, +                                "unlinks will be performed in background"); +        } + +        tmp_data = dict_get (this->options, "o-direct"); +        if (tmp_data) { +                if (gf_string2boolean (tmp_data->data, +                                       &_private->o_direct) == -1) { +                        ret = -1; +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_INVALID_OPTION_VAL, +                                "wrong option provided for 'o-direct'"); +                        goto out; +                } +                if (_private->o_direct) +                        gf_msg_debug (this->name, 0, "o-direct mode is enabled" +                                      " (O_DIRECT for every open)"); +        } + +        tmp_data = dict_get (this->options, "update-link-count-parent"); +        if (tmp_data) { +                if (gf_string2boolean (tmp_data->data, +                                       &_private->update_pgfid_nlinks) == -1) { +                        ret = -1; +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                P_MSG_INVALID_OPTION, "wrong value provided " +                                "for 'update-link-count-parent'"); +                        goto out; +                } +                if (_private->update_pgfid_nlinks) +                        gf_msg_debug (this->name, 0, "update-link-count-parent" +                                      " is enabled. Thus for each file an " +                                      "extended attribute representing the " +                                      "number of hardlinks for that file " +                                      "within the same parent directory is" +                                      " set."); +        } + +        ret = dict_get_str (this->options, "glusterd-uuid", &guuid); +        if (!ret) { +                if (gf_uuid_parse (guuid, _private->glusterd_uuid)) +                        gf_msg (this->name, GF_LOG_WARNING, 0, +                                P_MSG_INVALID_NODE_UUID, "Cannot parse " +                                "glusterd (node) UUID, node-uuid xattr " +                                "request would return - \"No such attribute\""); +        } else { +                gf_msg_debug (this->name, 0, "No glusterd (node) UUID passed -" +                              " node-uuid xattr request will return \"No such" +                              " attribute\""); +        } +        ret = 0; + +        GF_OPTION_INIT ("janitor-sleep-duration", +                        _private->janitor_sleep_duration, int32, out); + +        /* performing open dir on brick dir locks the brick dir +         * and prevents it from being unmounted +         */ +        _private->mount_lock = sys_opendir (dir_data->data); +        if (!_private->mount_lock) { +                ret = -1; +                op_errno = errno; +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        P_MSG_DIR_OPERATION_FAILED, +                        "Could not lock brick directory (%s)", +                        strerror (op_errno)); +                goto out; +        } +#ifndef GF_DARWIN_HOST_OS +        { +                struct rlimit lim; +                lim.rlim_cur = 1048576; +                lim.rlim_max = 1048576; + +                if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { +                        gf_msg (this->name, GF_LOG_WARNING, errno, +                                P_MSG_SET_ULIMIT_FAILED, +                                "Failed to set 'ulimit -n " +                                " 1048576'"); +                        lim.rlim_cur = 65536; +                        lim.rlim_max = 65536; + +                        if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { +                                gf_msg (this->name, GF_LOG_WARNING, errno, +                                        P_MSG_SET_FILE_MAX_FAILED, +                                        "Failed to set maximum allowed open " +                                        "file descriptors to 64k"); +                        } +                        else { +                                gf_msg (this->name, GF_LOG_INFO, 0, +                                        P_MSG_MAX_FILE_OPEN, "Maximum allowed " +                                        "open file descriptors set to 65536"); +                        } +                } +        } +#endif +        _private->shared_brick_count = 1; +        ret = dict_get_int32 (this->options, "shared-brick-count", +                              &_private->shared_brick_count); +        if (ret == -1) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        P_MSG_INVALID_OPTION_VAL, +                        "'shared-brick-count' takes only integer " +                        "values"); +                goto out; +        } + +        this->private = (void *)_private; + +        op_ret = posix_handle_init (this); +        if (op_ret == -1) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, +                        "Posix handle setup failed"); +                ret = -1; +                goto out; +        } + +        op_ret = posix_handle_trash_init (this); +        if (op_ret < 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH, +                        "Posix landfill setup failed"); +                ret = -1; +                goto out; +        } + +        op_ret = posix_create_unlink_dir (this); +        if (op_ret == -1) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        P_MSG_HANDLE_CREATE, +                        "Creation of unlink directory failed"); +                ret = -1; +                goto out; +        } + +        _private->aio_init_done = _gf_false; +        _private->aio_capable = _gf_false; + +        GF_OPTION_INIT ("brick-uid", uid, int32, out); +        GF_OPTION_INIT ("brick-gid", gid, int32, out); +        if (uid != -1 || gid != -1) +                posix_set_owner (this, uid, gid); + +        GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + +        if (_private->aio_configured) { +                op_ret = posix_aio_on (this); + +                if (op_ret == -1) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO, +                                "Posix AIO init failed"); +                        ret = -1; +                        goto out; +                } +        } + +        GF_OPTION_INIT ("node-uuid-pathinfo", +                        _private->node_uuid_pathinfo, bool, out); +        if (_private->node_uuid_pathinfo && +            (gf_uuid_is_null (_private->glusterd_uuid))) { +                        gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, +                                "glusterd uuid is NULL, pathinfo xattr would" +                                " fallback to <hostname>:<export>"); +        } + +        _private->disk_space_check_active = _gf_false; +        _private->disk_space_full          = 0; +        GF_OPTION_INIT ("reserve", +                        _private->disk_reserve, uint32, out); +        if (_private->disk_reserve) +                posix_spawn_disk_space_check_thread (this); + +        _private->health_check_active = _gf_false; +        GF_OPTION_INIT ("health-check-interval", +                        _private->health_check_interval, uint32, out); +        GF_OPTION_INIT ("health-check-timeout", +                        _private->health_check_timeout, uint32, out); +        if (_private->health_check_interval) +                posix_spawn_health_check_thread (this); + +        pthread_mutex_init (&_private->janitor_lock, NULL); +        pthread_cond_init (&_private->janitor_cond, NULL); +        INIT_LIST_HEAD (&_private->janitor_fds); + +        posix_spawn_janitor_thread (this); + +        pthread_mutex_init (&_private->fsync_mutex, NULL); +        pthread_cond_init (&_private->fsync_cond, NULL); +        INIT_LIST_HEAD (&_private->fsyncs); + +        ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this, +                                "posixfsy"); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, errno, +                        P_MSG_FSYNCER_THREAD_CREATE_FAILED, +                        "fsyncer thread creation failed"); +                goto out; +        } + +        GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + +        if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, +                        "Unknown mode string: %s", batch_fsync_mode_str); +                goto out; +        } + +        GF_OPTION_INIT ("gfid2path", _private->gfid2path, bool, out); + +        GF_OPTION_INIT ("gfid2path-separator", gfid2path_sep, str, out); +        if (set_gfid2path_separator (_private, gfid2path_sep) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, +                        "Length of separator exceeds 7: %s", gfid2path_sep); +                goto out; +        } + +#ifdef GF_DARWIN_HOST_OS + +        char  *xattr_user_namespace_mode_str = NULL; + +        GF_OPTION_INIT ("xattr-user-namespace-mode", +                        xattr_user_namespace_mode_str, str, out); + +        if (set_xattr_user_namespace_mode (_private, +                                           xattr_user_namespace_mode_str) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, +                        "Unknown xattr user namespace mode string: %s", +                        xattr_user_namespace_mode_str); +                goto out; +        } +#endif + +        GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, +                        uint32, out); +        GF_OPTION_INIT ("force-create-mode", force_create, int32, out); +        _private->force_create_mode = force_create; + +        GF_OPTION_INIT ("force-directory-mode", force_directory, int32, out); +        _private->force_directory_mode = force_directory; + +        GF_OPTION_INIT ("create-mask", +                        create_mask, int32, out); +        _private->create_mask = create_mask; + +        GF_OPTION_INIT ("create-directory-mask", +                        create_directory_mask, int32, out); +        _private->create_directory_mask = create_directory_mask; +out: +        if (ret) { +                if (_private) { +                        GF_FREE (_private->base_path); + +                        GF_FREE (_private->hostname); + +                        GF_FREE (_private->trash_path); + +                        GF_FREE (_private); +                } + +                this->private = NULL; +        } +        return ret; +} + +void +posix_fini (xlator_t *this) +{ +        struct posix_private *priv = this->private; +        if (!priv) +                return; +        this->private = NULL; +        /*unlock brick dir*/ +        if (priv->mount_lock) +                (void) sys_closedir (priv->mount_lock); + +        GF_FREE (priv->base_path); +        GF_FREE (priv->hostname); +        GF_FREE (priv->trash_path); +        GF_FREE (priv); + +        return; +} + +struct volume_options options[] = { +        { .key  = {"o-direct"}, +          .type = GF_OPTION_TYPE_BOOL }, +        { .key  = {"directory"}, +          .type = GF_OPTION_TYPE_PATH, +          .default_value = "{{brick.path}}" +        }, +        { .key  = {"hostname"}, +          .type = GF_OPTION_TYPE_ANY }, +        { .key  = {"export-statfs-size"}, +          .type = GF_OPTION_TYPE_BOOL }, +        { .key  = {"mandate-attribute"}, +          .type = GF_OPTION_TYPE_BOOL }, +        { .key  = {"background-unlink"}, +          .type = GF_OPTION_TYPE_BOOL }, +        { .key  = {"janitor-sleep-duration"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 1, +          .validate = GF_OPT_VALIDATE_MIN, +          .default_value = "10", +          .description = "Interval (in seconds) between times the internal " +                         "'landfill' directory is emptied." +        }, +        { .key  = {"volume-id"}, +          .type = GF_OPTION_TYPE_ANY, +          .default_value = "{{brick.volumeid}}" +        }, +        { .key  = {"glusterd-uuid"}, +          .type = GF_OPTION_TYPE_STR }, +        { +          .key  = {"linux-aio"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +          .description = "Support for native Linux AIO", +          .op_version = {1}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { +          .key = {"brick-uid"}, +          .type = GF_OPTION_TYPE_INT, +          .min = -1, +          .validate = GF_OPT_VALIDATE_MIN, +          .default_value = "-1", +          .description = "Support for setting uid of brick's owner", +          .op_version = {1}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { +          .key = {"brick-gid"}, +          .type = GF_OPTION_TYPE_INT, +          .min = -1, +          .validate = GF_OPT_VALIDATE_MIN, +          .default_value = "-1", +          .description = "Support for setting gid of brick's owner", +          .op_version = {1}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { .key = {"node-uuid-pathinfo"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +          .description = "return glusterd's node-uuid in pathinfo xattr" +                         " string instead of hostname", +          .op_version = {3}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { +          .key = {"health-check-interval"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .default_value = "30", +          .validate = GF_OPT_VALIDATE_MIN, +          .description = "Interval in seconds for a filesystem health check, " +                         "set to 0 to disable", +          .op_version = {3}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { +          .key = {"health-check-timeout"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .default_value = "10", +          .validate = GF_OPT_VALIDATE_MIN, +          .description = "Interval in seconds to wait aio_write finish for health check, " +                         "set to 0 to disable", +          .op_version = {GD_OP_VERSION_4_0_0}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { +          .key = {"reserve"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .default_value = "1", +          .validate = GF_OPT_VALIDATE_MIN, +          .description = "Percentage of disk space to be reserved." +           " Set to 0 to disable", +          .op_version = {GD_OP_VERSION_3_13_0}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { .key = {"batch-fsync-mode"}, +          .type = GF_OPTION_TYPE_STR, +          .default_value = "reverse-fsync", +          .description = "Possible values:\n" +          "\t- syncfs: Perform one syncfs() on behalf oa batch" +          "of fsyncs.\n" +          "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" +          " of fsyncs and one fsync() per batch.\n" +          "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" +          " of fsyncs and fsync() each file in the batch in reverse order.\n" +          " in reverse order.\n" +          "\t- reverse-fsync: Perform fsync() of each file in the batch in" +          " reverse order.", +          .op_version = {3}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { .key = {"batch-fsync-delay-usec"}, +          .type = GF_OPTION_TYPE_INT, +          .default_value = "0", +          .description = "Num of usecs to wait for aggregating fsync" +          " requests", +          .op_version = {3}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { .key = {"update-link-count-parent"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +          .description = "Enable placeholders for gfid to path conversion", +          .op_version = {GD_OP_VERSION_3_6_0}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +        { .key = {"gfid2path"}, +          .type = GF_OPTION_TYPE_BOOL, +#ifdef __NetBSD__ +          /* +           * NetBSD storage of extended attributes for UFS1 badly +           * scales when the list of extended attributes names rises. +           * This option can add as many extended attributes names +           * as we have files, hence we keep it disabled for performance +           * sake. +           */ +          .default_value = "off", +#else +          .default_value = "on", +#endif +          .description = "Enable logging metadata for gfid to path conversion", +          .op_version = {GD_OP_VERSION_3_12_0}, +          .flags = OPT_FLAG_SETTABLE +        }, +        { .key = {"gfid2path-separator"}, +          .type = GF_OPTION_TYPE_STR, +          .default_value = ":", +          .description = "Path separator for glusterfs.gfidtopath virt xattr", +          .op_version = {GD_OP_VERSION_3_12_0}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +#if GF_DARWIN_HOST_OS +        { .key = {"xattr-user-namespace-mode"}, +          .type = GF_OPTION_TYPE_STR, +          .default_value = "none", +          .description = "Option to control XATTR user namespace on the raw filesystem: " +          "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" +          " The raw filesystem will not be compatible with OS X Finder.\n" +          "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n", +          .op_version = {GD_OP_VERSION_3_6_0}, +          .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +#endif +        { .key  = {"shared-brick-count"}, +          .type = GF_OPTION_TYPE_INT, +          .default_value = "1", +          .description = "Number of bricks sharing the same backend export." +          " Useful for displaying the proper usable size through statvfs() " +          "call (df command)", +        }, +        { .key  = {"force-create-mode"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0000, +          .max = 0777, +          .default_value = "0000", +          .validate = GF_OPT_VALIDATE_MIN, +          .validate = GF_OPT_VALIDATE_MAX, +          .description = "Mode bit permission that will always be set on a file." +        }, +        { .key  = {"force-directory-mode"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0000, +          .max = 0777, +          .default_value = "0000", +          .validate = GF_OPT_VALIDATE_MIN, +          .validate = GF_OPT_VALIDATE_MAX, +          .description = "Mode bit permission that will be always set on directory" +        }, +        { .key  = {"create-mask"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0000, +          .max = 0777, +          .default_value = "0777", +          .validate = GF_OPT_VALIDATE_MIN, +          .validate = GF_OPT_VALIDATE_MAX, +          .description = "Any bit not set here will be removed from the" +          "modes set on a file when it is created" +        }, +        { .key  = {"create-directory-mask"}, +          .type = GF_OPTION_TYPE_INT, +          .min = 0000, +          .max = 0777, +          .default_value = "0777", +          .validate = GF_OPT_VALIDATE_MIN, +          .validate = GF_OPT_VALIDATE_MAX, +          .description = "Any bit not set here will be removed from the" +          "modes set on a directory when it is created" +        }, +        { .key  = {NULL} } +};  | 
