diff options
Diffstat (limited to 'xlators/storage')
| -rw-r--r-- | xlators/storage/posix/src/posix-aio.c | 2 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-aio.h | 3 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-common.c | 250 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-entry-ops.c | 414 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-gfid-path.c | 94 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-gfid-path.h | 11 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-handle.c | 205 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-handle.h | 33 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 716 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 706 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-inode-handle.h | 14 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-messages.h | 3 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-metadata.c | 480 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-metadata.h | 20 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.h | 148 |
15 files changed, 2038 insertions, 1061 deletions
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c index 971ace94754..d0cb0002bbf 100644 --- a/xlators/storage/posix/src/posix-aio.c +++ b/xlators/storage/posix/src/posix-aio.c @@ -7,8 +7,6 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include <glusterfs/xlator.h> -#include <glusterfs/glusterfs.h> #include "posix.h" #include <sys/uio.h> #include "posix-messages.h" diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h index d2589bffc07..b316deb3229 100644 --- a/xlators/storage/posix/src/posix-aio.h +++ b/xlators/storage/posix/src/posix-aio.h @@ -10,9 +10,6 @@ #ifndef _POSIX_AIO_H #define _POSIX_AIO_H -#include <glusterfs/xlator.h> -#include <glusterfs/glusterfs.h> - // Maximum number of concurrently submitted IO events. The heaviest load // GlusterFS has been able to handle had 60-80 concurrent calls #define POSIX_AIO_MAX_NR_EVENTS 256 diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index f0d8e3fe0c2..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -36,15 +35,7 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include <glusterfs/glusterfs.h> -#include <glusterfs/checksum.h> -#include <glusterfs/dict.h> -#include <glusterfs/logging.h> -#include "posix.h" #include "posix-inode-handle.h" -#include <glusterfs/xlator.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> #include <glusterfs/compat-errno.h> #include <glusterfs/compat.h> #include <glusterfs/byte-order.h> @@ -53,7 +44,6 @@ #include <glusterfs/locking.h> #include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include <glusterfs/hashfn.h> #include "posix-aio.h" #include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" @@ -111,13 +101,13 @@ posix_priv(xlator_t *this) struct posix_private *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; + if (!this) + return 0; + (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); gf_proc_dump_add_section("%s", key_prefix); - if (!this) - return 0; - priv = this->private; if (!priv) @@ -128,7 +118,6 @@ posix_priv(xlator_t *this) gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); gf_proc_dump_write("max_write", "%" PRId64, GF_ATOMIC_GET(priv->write_value)); - gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files)); return 0; } @@ -146,16 +135,55 @@ int32_t posix_notify(xlator_t *this, int32_t event, void *data, ...) { xlator_t *victim = data; + struct posix_private *priv = this->private; + int ret = 0; + struct timespec sleep_till = { + 0, + }; + glusterfs_ctx_t *ctx = this->ctx; switch (event) { case GF_EVENT_PARENT_UP: { - /* Tell the parent that posix xlator is up */ + /* Notify the parent that posix xlator is up */ default_notify(this, GF_EVENT_CHILD_UP, data); } break; case GF_EVENT_PARENT_DOWN: { if (!victim->cleanup_starting) break; + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); + { + priv->janitor_task_stop = _gf_true; + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, + priv->janitor); + if (!ret) { + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + /* Wait to set janitor_task flag to _gf_false by + * janitor_task_done */ + while (priv->janitor_task_stop) { + (void)pthread_cond_timedwait(&priv->janitor_cond, + &priv->janitor_mutex, + &sleep_till); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + } + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + GF_FREE(priv->janitor); + } + priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", victim->name); default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); @@ -345,11 +373,20 @@ posix_reconfigure(xlator_t *this, dict_t *options) " fallback to <hostname>:<export>"); } - GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out); + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ + priv->disk_unit = 0; + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + if (priv->disk_reserve) { ret = posix_spawn_disk_space_check_thread(this); - if (ret) + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed"); goto out; + } } GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, @@ -515,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this) return 0; } +int +posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name) +{ + int ret = -1; + + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno == ENOENT) { + ret = sys_mkdirat(pdirfd, dir_name, 0700); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", dir_name); + goto out; + } + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-1 %s ", dir_name); + goto out; + } + } +out: + return ret; +} + /** * init - */ @@ -541,7 +602,7 @@ posix_init(xlator_t *this) uuid_t gfid = { 0, }; - uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; char *guuid = NULL; int32_t uid = -1; int32_t gid = -1; @@ -551,6 +612,15 @@ posix_init(xlator_t *this) int force_directory = -1; int create_mask = -1; int create_directory_mask = -1; + char dir_handle[PATH_MAX] = { + 0, + }; + int i; + char fhash[4] = { + 0, + }; + int hdirfd = -1; + char value; dir_data = dict_get(this->options, "directory"); @@ -591,7 +661,12 @@ posix_init(xlator_t *this) } _private->base_path = gf_strdup(dir_data->data); - _private->base_path_length = strlen(_private->base_path); + _private->base_path_length = dir_data->len - 1; + + _private->dirfd = -1; + _private->mount_lock = -1; + for (i = 0; i < 256; i++) + _private->arrdfd[i] = -1; ret = dict_get_str(this->options, "hostname", &_private->hostname); if (ret) { @@ -607,16 +682,11 @@ posix_init(xlator_t *this) } /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", - 8, 0); - if (op_ret != -1) { - ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); - if (ret) { - gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, - "failed to remove xattr: " - "trusted.glusterfs.test"); - } - } else { + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); tmp_data = dict_get(this->options, "mandate-attribute"); if (tmp_data) { if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { @@ -776,7 +846,6 @@ posix_init(xlator_t *this) } LOCK_INIT(&_private->lock); - GF_ATOMIC_INIT(_private->nr_files, 0); GF_ATOMIC_INIT(_private->read_value, 0); GF_ATOMIC_INIT(_private->write_value, 0); @@ -866,8 +935,9 @@ posix_init(xlator_t *this) /* performing open dir on brick dir locks the brick dir * and prevents it from being unmounted */ - _private->mount_lock = sys_opendir(dir_data->data); - if (!_private->mount_lock) { + _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY), + 0); + if (_private->mount_lock < 0) { ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, @@ -911,6 +981,28 @@ posix_init(xlator_t *this) } this->private = (void *)_private; + snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path, + GF_HIDDEN_PATH); + hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock, + dir_handle); + if (hdirfd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error open directory failed for dir %s", dir_handle); + ret = -1; + goto out; + } + _private->dirfd = hdirfd; + for (i = 0; i < 256; i++) { + snprintf(fhash, sizeof(fhash), "%02x", i); + _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd, + fhash); + if (_private->arrdfd[i] < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error openat failed for file %s", fhash); + ret = -1; + goto out; + } + } op_ret = posix_handle_init(this); if (op_ret == -1) { @@ -968,11 +1060,21 @@ posix_init(xlator_t *this) _private->disk_space_check_active = _gf_false; _private->disk_space_full = 0; - GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out); + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + if (_private->disk_reserve) { ret = posix_spawn_disk_space_check_thread(this); - if (ret) + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed "); goto out; + } } _private->health_check_active = _gf_false; @@ -989,7 +1091,11 @@ posix_init(xlator_t *this) pthread_mutex_init(&_private->fsync_mutex, NULL); pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; ret = posix_spawn_ctx_janitor_thread(this); if (ret) goto out; @@ -1066,9 +1172,27 @@ posix_init(xlator_t *this) out); GF_OPTION_INIT("ctime", _private->ctime, bool, out); + out: if (ret) { if (_private) { + if (_private->dirfd >= 0) { + sys_close(_private->dirfd); + _private->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (_private->arrdfd[i] >= 0) { + sys_close(_private->arrdfd[i]); + _private->arrdfd[i] = -1; + } + } + /*unlock brick dir*/ + if (_private->mount_lock >= 0) { + (void)sys_close(_private->mount_lock); + _private->mount_lock = -1; + } + GF_FREE(_private->base_path); GF_FREE(_private->hostname); @@ -1088,7 +1212,10 @@ posix_fini(xlator_t *this) { struct posix_private *priv = this->private; gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; int ret = 0; + int i = 0; if (!priv) return; @@ -1099,6 +1226,18 @@ posix_fini(xlator_t *this) } UNLOCK(&priv->lock); + if (priv->dirfd >= 0) { + sys_close(priv->dirfd); + priv->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (priv->arrdfd[i] >= 0) { + sys_close(priv->arrdfd[i]); + priv->arrdfd[i] = -1; + } + } + if (health_check) { (void)gf_thread_cleanup_xint(priv->health_check); priv->health_check = 0; @@ -1109,6 +1248,7 @@ posix_fini(xlator_t *this) (void)gf_thread_cleanup_xint(priv->disk_space_check); priv->disk_space_check = 0; } + if (priv->janitor) { /*TODO: Make sure the synctask is also complete */ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); @@ -1116,19 +1256,39 @@ posix_fini(xlator_t *this) gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, "Failed to delete janitor timer"); } + GF_FREE(priv->janitor); priv->janitor = NULL; } + + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; } /*unlock brick dir*/ - if (priv->mount_lock) - (void)sys_closedir(priv->mount_lock); + if (priv->mount_lock >= 0) { + (void)sys_close(priv->mount_lock); + priv->mount_lock = -1; + } GF_FREE(priv->base_path); LOCK_DESTROY(&priv->lock); pthread_mutex_destroy(&priv->fsync_mutex); + pthread_cond_destroy(&priv->fsync_cond); + pthread_mutex_destroy(&priv->janitor_mutex); + pthread_cond_destroy(&priv->janitor_cond); GF_FREE(priv->hostname); GF_FREE(priv->trash_path); GF_FREE(priv); @@ -1200,7 +1360,7 @@ struct volume_options posix_options[] = { {.key = {"health-check-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 0, - .default_value = "10", + .default_value = "20", .validate = GF_OPT_VALIDATE_MIN, .description = "Interval in seconds to wait aio_write finish for health check, " @@ -1208,11 +1368,11 @@ struct volume_options posix_options[] = { .op_version = {GD_OP_VERSION_4_0_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"reserve"}, - .type = GF_OPTION_TYPE_INT, + .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, .min = 0, .default_value = "1", .validate = GF_OPT_VALIDATE_MIN, - .description = "Percentage of disk space to be reserved." + .description = "Percentage/Size of disk space to be reserved." " Set to 0 to disable", .op_version = {GD_OP_VERSION_3_13_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, @@ -1306,24 +1466,21 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will always be set on a file."}, {.key = {"force-directory-mode"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will be always set on directory"}, {.key = {"create-mask"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a file when it is created"}, {.key = {"create-directory-mask"}, @@ -1331,8 +1488,7 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a directory when it is created"}, {.key = {"max-hardlinks"}, diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c index fbd83c4aed1..8cc3ccf8c00 100644 --- a/xlators/storage/posix/src/posix-entry-ops.c +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -36,15 +35,10 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include <glusterfs/glusterfs.h> -#include <glusterfs/checksum.h> #include <glusterfs/dict.h> #include <glusterfs/logging.h> #include "posix.h" #include "posix-handle.h" -#include <glusterfs/xlator.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> #include <glusterfs/compat-errno.h> #include <glusterfs/compat.h> #include <glusterfs/byte-order.h> @@ -53,7 +47,6 @@ #include <glusterfs/locking.h> #include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include <glusterfs/hashfn.h> #include "posix-aio.h" #include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" @@ -93,7 +86,7 @@ extern char *marker_xattrs[]; #endif -gf_boolean_t +static gf_boolean_t posix_symlinks_match(xlator_t *this, loc_t *loc, uuid_t gfid) { struct posix_private *priv = NULL; @@ -114,7 +107,7 @@ posix_symlinks_match(xlator_t *this, loc_t *loc, uuid_t gfid) loc->pargfid[0], loc->pargfid[1], uuid_utoa(loc->pargfid), loc->name); - MAKE_HANDLE_GFID_PATH(dir_handle, this, gfid, NULL); + MAKE_HANDLE_GFID_PATH(dir_handle, this, gfid); len = sys_readlink(dir_handle, linkname_actual, PATH_MAX); if (len < 0 || len == PATH_MAX) { if (len == PATH_MAX) { @@ -136,12 +129,12 @@ out: return ret; } -dict_t * +static dict_t * posix_dict_set_nlink(dict_t *req, dict_t *res, int32_t nlink) { int ret = -1; - if (req == NULL || !dict_get(req, GF_REQUEST_LINK_COUNT_XDATA)) + if (req == NULL || !dict_get_sizen(req, GF_REQUEST_LINK_COUNT_XDATA)) goto out; if (res == NULL) @@ -183,6 +176,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) struct posix_private *priv = NULL; posix_inode_ctx_t *ctx = NULL; int ret = 0; + int dfd = -1; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); @@ -204,7 +198,20 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) goto out; } - op_ret = dict_get_int32(xdata, GF_GFIDLESS_LOOKUP, &gfidless); +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && loc->name && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED, + "Lookup issued on .attribute," + " which is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif /* __NetBSD__ */ + + op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless); op_ret = -1; if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) { /* nameless lookup */ @@ -239,12 +246,12 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!op_errno) op_errno = ESTALE; loc_gfid(loc, gfid); - MAKE_HANDLE_ABSPATH(gfid_path, this, gfid); - ret = sys_stat(gfid_path, &statbuf); + MAKE_HANDLE_ABSPATH_FD(gfid_path, this, gfid, dfd); + ret = sys_fstatat(dfd, gfid_path, &statbuf, 0); if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR)) /*Don't unset if it was a symlink to a dir.*/ goto parent; - ret = sys_lstat(gfid_path, &statbuf); + ret = sys_fstatat(dfd, gfid_path, &statbuf, AT_SYMLINK_NOFOLLOW); if (ret == 0 && statbuf.st_nlink == 1) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_HANDLE_DELETE, @@ -263,7 +270,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, &xattr, _gf_true); - if (dict_get(xdata, GF_CLEAN_WRITE_PROTECTION)) { + if (dict_get_sizen(xdata, GF_CLEAN_WRITE_PROTECTION)) { ret = sys_lremovexattr(real_path, GF_PROTECT_FROM_EXTERNAL_WRITES); if (ret == -1 && (errno != ENODATA && errno != ENOATTR)) gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, @@ -272,6 +279,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) } } + posix_update_iatt_buf(&buf, -1, real_path, xdata); if (priv->update_pgfid_nlinks) { if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) { MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, @@ -336,6 +344,38 @@ out: return 0; } +static int32_t +posix_set_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, + const char *bname) +{ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + char *key = NULL; + const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + + GF_XXH64_DIGEST_LENGTH * 2 + 1; + int ret = 0; + int len; + + len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), + bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, len, + GF_XXHSUM64_DEFAULT_SEED, xxh64); + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + ret = sys_lsetxattr(path, key, pgfid_bname, len, XATTR_CREATE); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, + "setting gfid2path xattr failed on %s: key = %s ", path, key); + } + + return ret; +} + int posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) @@ -375,7 +415,8 @@ posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL); mode_bit = (priv->create_mask & mode) | priv->force_create_mode; @@ -407,16 +448,22 @@ posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, /* Check if the 'gfid' already exists, because this mknod may be an internal call from distribute for creating 'linkfile', and that linkfile may be for a hardlinked file */ - if (dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { - dict_del(xdata, GLUSTERFS_INTERNAL_FOP_KEY); - op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); - if (op_ret) { - gf_msg_debug(this->name, 0, - "failed to get the gfid from " - "dict for %s", - loc->path); - goto real_op; + if (dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY); + /* trash xlator did not bring the uuid_via the call + * to GFID_NULL_CHECK_AND_GOTO() above. + * Fetch it explicitly here. + */ + if (frame->root->pid == GF_SERVER_PID_TRASH) { + op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_msg_debug(this->name, 0, + "failed to get the gfid from dict for %s", + loc->path); + goto real_op; + } } + op_ret = posix_create_link_if_gfid_exists(this, uuid_req, real_path, loc->inode->table); if (!op_ret) { @@ -499,7 +546,7 @@ post_op: posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); } - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { if (errno != EEXIST) gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, @@ -574,6 +621,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, int32_t op_errno = 0; char *real_path = NULL, *gfid_path = NULL; char *par_path = NULL, *xattr_name = NULL; + int xattr_name_len; struct iatt stbuf = { 0, }; @@ -591,13 +639,9 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, }; ssize_t size = 0; dict_t *xdata_rsp = NULL; - void *disk_xattr = NULL; + char *disk_xattr = NULL; data_t *arg_data = NULL; char pgfid[GF_UUID_BUF_SIZE] = {0}; - char value_buf[4096] = { - 0, - }; - gf_boolean_t have_val = _gf_false; mode_t mode_bit = 0; DECLARE_OLD_FS_ID_VAR; @@ -619,9 +663,23 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED, + "mkdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL); @@ -631,11 +689,6 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } - if (loc->parent) - gf_uuid_unparse(loc->parent->gfid, pgfid); - else - gf_uuid_unparse(loc->pargfid, pgfid); - gid = frame->root->gid; op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); @@ -647,8 +700,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode = posix_override_umask(mode, mode_bit); if (xdata) { - op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); - if (!op_ret && !gf_uuid_compare(stbuf.ia_gfid, uuid_req)) { + if (!gf_uuid_compare(stbuf.ia_gfid, uuid_req)) { op_ret = -1; op_errno = EEXIST; goto out; @@ -658,12 +710,13 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, if (!gf_uuid_is_null(uuid_req)) { op_ret = posix_istat(this, loc->inode, uuid_req, NULL, &stbuf); if ((op_ret == 0) && IA_ISDIR(stbuf.ia_type)) { - size = posix_handle_path(this, uuid_req, NULL, NULL, 0); - if (size > 0) - gfid_path = alloca(size); - - if (gfid_path) - posix_handle_path(this, uuid_req, NULL, gfid_path, size); + gfid_path = alloca(PATH_MAX); + size = posix_handle_path(this, uuid_req, NULL, gfid_path, PATH_MAX); + if (size <= 0) { + op_errno = ESTALE; + op_ret = -1; + goto out; + } if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_OF_SAME_ID, @@ -712,25 +765,53 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode |= S_ISGID; } - op_ret = dict_get_str(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); if (xattr_name != NULL) { - arg_data = dict_get(xdata, xattr_name); + xattr_name_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_name_len); if (arg_data) { - size = sys_lgetxattr(par_path, xattr_name, value_buf, - sizeof(value_buf) - 1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg(this->name, GF_LOG_INFO, errno, + if (loc->parent) + gf_uuid_unparse(loc->parent->gfid, pgfid); + else + gf_uuid_unparse(loc->pargfid, pgfid); + + size = 256; + disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!disk_xattr) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): GF_MALLOC failed during" + " preop of mkdir (%s)", + pgfid, loc->name, real_path); + goto out; + } + disk_xattr[size] = '\0'; + + size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size); + if (size == -1) { + if (disk_xattr) { + GF_FREE(disk_xattr); + disk_xattr = NULL; + } + if (errno != ERANGE) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): getxattr on key " - "(%s) path (%s) failed due to " - " buffer overflow", - pgfid, loc->name, xattr_name, par_path); - size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + "mkdir (%s/%s): getxattr failed during" + " preop of mkdir (%s).", + pgfid, loc->name, real_path); + goto out; } - if (size < 0) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on key " + "(%s) path (%s) failed due to " + " buffer overflow", + pgfid, loc->name, xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + if (size == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, @@ -740,23 +821,20 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, pgfid, loc->name, xattr_name, par_path); goto out; } - } - disk_xattr = alloca(size); - if (disk_xattr == NULL) { - op_ret = -1; - op_errno = errno; - gf_msg(this->name, GF_LOG_ERROR, errno, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): alloca failed during" - " preop of mkdir (%s)", - pgfid, loc->name, real_path); - goto out; - } - if (have_val) { - memcpy(disk_xattr, value_buf, size); - } else { + disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!disk_xattr) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): GF_MALLOC failed during" + " preop of mkdir (%s)", + pgfid, loc->name, real_path); + goto out; + } + disk_xattr[size] = '\0'; size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size); - if (size < 0) { + if (size == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, @@ -796,10 +874,10 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } - dict_del(xdata, xattr_name); + dict_deln(xdata, xattr_name, xattr_name_len); } - dict_del(xdata, GF_PREOP_PARENT_KEY); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); } op_ret = sys_mkdir(real_path, mode); @@ -827,7 +905,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, "setting ACLs on %s failed ", real_path); } - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "setting xattrs on %s failed", real_path); @@ -869,6 +947,9 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, out: SET_TO_OLD_FS_ID(); + if (disk_xattr) + GF_FREE(disk_xattr); + if (op_ret < 0) { if (entry_created) sys_rmdir(real_path); @@ -887,7 +968,7 @@ out: return 0; } -int +static int posix_add_unlink_to_ctx(inode_t *inode, xlator_t *this, char *unlink_path) { uint64_t ctx = GF_UNLINK_FALSE; @@ -895,7 +976,7 @@ posix_add_unlink_to_ctx(inode_t *inode, xlator_t *this, char *unlink_path) if (!unlink_path) { gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, - "Creation of unlink entry failed for gfid: %s", unlink_path); + "Creation of unlink entry failed"); ret = -1; goto out; } @@ -910,17 +991,17 @@ out: return ret; } -int32_t +static int32_t posix_move_gfid_to_unlink(xlator_t *this, uuid_t gfid, loc_t *loc) { char *unlink_path = NULL; char *gfid_path = NULL; - int ret = 0; + int ret = -1; struct posix_private *priv_posix = NULL; priv_posix = (struct posix_private *)this->private; - MAKE_HANDLE_GFID_PATH(gfid_path, this, gfid, NULL); + MAKE_HANDLE_GFID_PATH(gfid_path, this, gfid); POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, loc->inode->gfid, unlink_path); @@ -944,7 +1025,7 @@ out: return ret; } -int32_t +static int32_t posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, const char *real_path, struct iatt *stbuf, int32_t *op_errno, loc_t *loc, @@ -987,6 +1068,8 @@ posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, */ ret = posix_pstat(this, NULL, loc->gfid, real_path, &prebuf, _gf_true); if (ret) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "lstat on %s failed", real_path); goto err; @@ -995,6 +1078,12 @@ posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, /* Unlink the actual file */ ret = sys_unlink(real_path); + + if (locked) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; + } + if (ret == -1) { if (op_errno) *op_errno = errno; @@ -1003,11 +1092,6 @@ posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, goto err; } - if (locked) { - UNLOCK(&loc->inode->lock); - locked = _gf_false; - } - if (update_ctime) { posix_set_ctime(frame, this, NULL, -1, loc->inode, stbuf); } @@ -1027,10 +1111,10 @@ err: return -1; } -gf_boolean_t +static gf_boolean_t posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, - const char *linkto_xattr, struct iatt *stbuf, - const char *real_path) + const int keylen, const char *linkto_xattr, + struct iatt *stbuf, const char *real_path) { gf_boolean_t skip_unlink = _gf_false; gf_boolean_t is_dht_linkto_file = _gf_false; @@ -1038,7 +1122,7 @@ posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, ssize_t xattr_size = -1; int op_ret = -1; - op_ret = dict_get_int32(xdata, key, &unlink_if_linkto); + op_ret = dict_get_int32n(xdata, key, keylen, &unlink_if_linkto); if (!op_ret && unlink_if_linkto) { is_dht_linkto_file = IS_DHT_LINKFILE_MODE(stbuf); @@ -1049,11 +1133,11 @@ posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, xattr_size = sys_lgetxattr(real_path, linkto_xattr, NULL, 0); + UNLOCK(&loc->inode->lock); + if (xattr_size <= 0) skip_unlink = _gf_true; - UNLOCK(&loc->inode->lock); - gf_msg("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS, "linkto_xattr status: %" PRIu32 " for %s", skip_unlink, real_path); @@ -1061,6 +1145,38 @@ posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, return skip_unlink; } +static int32_t +posix_remove_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, + const char *bname) +{ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + int ret = 0; + char *key = NULL; + const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + + GF_XXH64_DIGEST_LENGTH * 2 + 1; + int len; + + len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), + bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, len, + GF_XXHSUM64_DEFAULT_SEED, xxh64); + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + ret = sys_lremovexattr(path, key); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, + "removing gfid2path xattr failed on %s: key = %s", path, key); + } + + return ret; +} + int32_t posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) @@ -1070,6 +1186,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, char *real_path = NULL; char *par_path = NULL; int32_t fd = -1; + int ret = -1; struct iatt stbuf = { 0, }; @@ -1089,9 +1206,6 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, int32_t skip_unlink = 0; int32_t fdstat_requested = 0; dict_t *unwind_dict = NULL; - void *uuid = NULL; - char uuid_str[GF_UUID_BUF_SIZE] = {0}; - char gfid_str[GF_UUID_BUF_SIZE] = {0}; gf_boolean_t get_link_count = _gf_false; posix_inode_ctx_t *ctx = NULL; @@ -1121,22 +1235,8 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, priv = this->private; - op_ret = dict_get_ptr(xdata, TIER_LINKFILE_GFID, &uuid); - - if (!op_ret && gf_uuid_compare(uuid, stbuf.ia_gfid)) { - op_errno = ENOENT; - op_ret = -1; - gf_uuid_unparse(uuid, uuid_str); - gf_uuid_unparse(stbuf.ia_gfid, gfid_str); - gf_msg_debug(this->name, op_errno, - "Mismatch in gfid for path " - "%s. Aborting the unlink. loc->gfid = %s, " - "stbuf->ia_gfid = %s", - real_path, uuid_str, gfid_str); - goto out; - } - - op_ret = dict_get_int32(xdata, DHT_SKIP_OPEN_FD_UNLINK, &check_open_fd); + op_ret = dict_get_int32_sizen(xdata, DHT_SKIP_OPEN_FD_UNLINK, + &check_open_fd); if (!op_ret && check_open_fd) { LOCK(&loc->inode->lock); @@ -1163,10 +1263,8 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, * we don't need to call second function, skip unlink. */ skip_unlink = posix_skip_non_linkto_unlink( - xdata, loc, DHT_SKIP_NON_LINKTO_UNLINK, DHT_LINKTO, &stbuf, real_path); - skip_unlink = skip_unlink || posix_skip_non_linkto_unlink( - xdata, loc, TIER_SKIP_NON_LINKTO_UNLINK, - TIER_LINKTO, &stbuf, real_path); + xdata, loc, DHT_SKIP_NON_LINKTO_UNLINK, + SLEN(DHT_SKIP_NON_LINKTO_UNLINK), DHT_LINKTO, &stbuf, real_path); if (skip_unlink) { op_ret = -1; op_errno = EBUSY; @@ -1174,7 +1272,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, } if (IA_ISREG(loc->inode->ia_type) && xdata && - dict_get(xdata, DHT_IATT_IN_XDATA_KEY)) { + dict_get_sizen(xdata, DHT_IATT_IN_XDATA_KEY)) { fdstat_requested = 1; } @@ -1234,7 +1332,15 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, goto out; } - if (xdata && dict_get(xdata, GET_LINK_COUNT)) + if (xdata && dict_get_sizen(xdata, GF_GET_FILE_BLOCK_COUNT)) { + ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT, + stbuf.ia_blocks); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT); + } + + if (xdata && dict_get_sizen(xdata, GET_LINK_COUNT)) get_link_count = _gf_true; op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf, &op_errno, loc, get_link_count, @@ -1336,6 +1442,19 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED, + "rmdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -1355,13 +1474,12 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, } if (flags) { - gfid_str = uuid_utoa(stbuf.ia_gfid); - op_ret = sys_mkdir(priv->trash_path, 0755); if (errno != EEXIST && op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED, "mkdir of %s failed", priv->trash_path); } else { + gfid_str = uuid_utoa(stbuf.ia_gfid); (void)snprintf(tmp_path, sizeof(tmp_path), "%s/%s", priv->trash_path, gfid_str); gf_msg_debug(this->name, 0, "Moving %s to %s", real_path, tmp_path); @@ -1441,6 +1559,9 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, char *pgfid_xattr_key = NULL; int32_t nlink_samepgfid = 0; gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + uuid_t uuid_req = { + 0, + }; DECLARE_OLD_FS_ID_VAR; @@ -1451,7 +1572,8 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -1519,7 +1641,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, } ignore: - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "setting xattrs on %s failed ", real_path); @@ -1624,7 +1746,6 @@ posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, priv = this->private; VALIDATE_OR_GOTO(priv, out); - DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL); @@ -2060,6 +2181,11 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, char *pgfid_xattr_key = NULL; gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; mode_t mode_bit = 0; + uuid_t uuid_req = { + 0, + }; + + dict_t *xdata_rsp = dict_ref(xdata); DECLARE_OLD_FS_ID_VAR; @@ -2071,7 +2197,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -2109,6 +2236,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, was_present = 0; } + if (!was_present) { + if (posix_is_layout_stale(xdata, par_path, this)) { + op_ret = -1; + op_errno = EIO; + if (!xdata_rsp) { + xdata_rsp = dict_new(); + if (!xdata_rsp) { + op_errno = ENOMEM; + goto out; + } + } + + if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == + -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, + "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); + } + + goto out; + } + } + if (priv->o_direct) _flags |= O_DIRECT; @@ -2157,7 +2306,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); } ignore: - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "setting xattrs on %s failed ", real_path); @@ -2210,8 +2359,6 @@ fill_stat: gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd context path=%s fd=%p", real_path, fd); - GF_ATOMIC_INC(priv->nr_files); - op_ret = 0; out: @@ -2230,7 +2377,10 @@ out: STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, (loc) ? loc->inode : NULL, &stbuf, &preparent, - &postparent, xdata); + &postparent, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); return 0; } diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c index 64b5c6c3f09..1b38e9b0479 100644 --- a/xlators/storage/posix/src/posix-gfid-path.c +++ b/xlators/storage/posix/src/posix-gfid-path.c @@ -8,8 +8,9 @@ cases as published by the Free Software Foundation. */ -#include <glusterfs/common-utils.h> -#include <glusterfs/xlator.h> +#include <stdint.h> + +#include <glusterfs/compat-errno.h> #include <glusterfs/syscall.h> #include <glusterfs/logging.h> #include "posix-messages.h" @@ -17,93 +18,14 @@ #include "posix-gfid-path.h" #include "posix.h" -int32_t -posix_set_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, - const char *bname) -{ - char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { - 0, - }; - char pgfid_bname[1024] = { - 0, - }; - char *key = NULL; - char *val = NULL; - size_t key_size = 0; - size_t val_size = 0; - int ret = 0; - - GF_VALIDATE_OR_GOTO("posix", this, err); - - snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), - bname); - gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname), - GF_XXHSUM64_DEFAULT_SEED, xxh64); - key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + GF_XXH64_DIGEST_LENGTH * 2 + - 1; - key = alloca(key_size); - snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); - - val_size = UUID_CANONICAL_FORM_LEN + NAME_MAX + 2; - val = alloca(val_size); - snprintf(val, val_size, "%s/%s", uuid_utoa(pgfid), bname); - - ret = sys_lsetxattr(path, key, val, strlen(val), XATTR_CREATE); - if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, - "setting gfid2path xattr failed on %s: key = %s ", path, key); - goto err; - } - return 0; -err: - return -1; -} - -int32_t -posix_remove_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, - const char *bname) -{ - char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { - 0, - }; - char pgfid_bname[1024] = { - 0, - }; - int ret = 0; - char *key = NULL; - size_t key_size = 0; - - GF_VALIDATE_OR_GOTO("posix", this, err); - - snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), - bname); - gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname), - GF_XXHSUM64_DEFAULT_SEED, xxh64); - key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + GF_XXH64_DIGEST_LENGTH * 2 + - 1; - key = alloca(key_size); - snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); - - ret = sys_lremovexattr(path, key); - if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, - "removing gfid2path xattr failed on %s: key = %s", path, key); - goto err; - } - return 0; -err: - return -1; -} - gf_boolean_t posix_is_gfid2path_xattr(const char *name) { if (name && strncmp(GFID2PATH_XATTR_KEY_PREFIX, name, - GFID2PATH_XATTR_KEY_PREFIX_LENGTH) == 0) { + GFID2PATH_XATTR_KEY_PREFIX_LENGTH) == 0) return _gf_true; - } else { - return _gf_false; - } + + return _gf_false; } static int gf_posix_xattr_enotsup_log; @@ -213,7 +135,8 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, remaining_size = size; list_offset = 0; while (remaining_size > 0) { - snprintf(keybuffer, sizeof(keybuffer), "%s", list + list_offset); + len = snprintf(keybuffer, sizeof(keybuffer), "%s", + list + list_offset); if (!posix_is_gfid2path_xattr(keybuffer)) { goto ignore; @@ -243,7 +166,6 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, i++; ignore: - len = strlen(keybuffer); remaining_size -= (len + 1); list_offset += (len + 1); } /* while (remaining_size > 0) */ diff --git a/xlators/storage/posix/src/posix-gfid-path.h b/xlators/storage/posix/src/posix-gfid-path.h index 323f11429a8..79096e5893f 100644 --- a/xlators/storage/posix/src/posix-gfid-path.h +++ b/xlators/storage/posix/src/posix-gfid-path.h @@ -11,16 +11,15 @@ #ifndef _POSIX_GFID_PATH_H #define _POSIX_GFID_PATH_H -#include <glusterfs/xlator.h> -#include <glusterfs/common-utils.h> #include <glusterfs/compat-errno.h> +#include <stdint.h> // for int32_t +#include "glusterfs/dict.h" // for dict_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t +#include "glusterfs/inode.h" // for inode_t +#include "uuid.h" // for uuid_t #define MAX_GFID2PATH_LINK_SUP 500 -int32_t -posix_set_gfid2path_xattr(xlator_t *, const char *, uuid_t, const char *); -int32_t -posix_remove_gfid2path_xattr(xlator_t *, const char *, uuid_t, const char *); gf_boolean_t posix_is_gfid2path_xattr(const char *name); int32_t diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c index dac55e17118..410b38da8cb 100644 --- a/xlators/storage/posix/src/posix-handle.c +++ b/xlators/storage/posix/src/posix-handle.c @@ -16,11 +16,8 @@ #include <alloca.h> #endif -#include <glusterfs/common-utils.h> - #include "posix-handle.h" #include "posix.h" -#include <glusterfs/xlator.h> #include <glusterfs/syscall.h> #include "posix-messages.h" #include "posix-metadata.h" @@ -28,7 +25,7 @@ #include <glusterfs/compat-errno.h> int -posix_handle_mkdir_hashes(xlator_t *this, const char *newpath); +posix_handle_mkdir_hashes(xlator_t *this, int dfd, uuid_t gfid); inode_t * posix_resolve(xlator_t *this, inode_table_t *itable, inode_t *parent, @@ -266,9 +263,7 @@ posix_handle_relpath(xlator_t *this, uuid_t gfid, const char *basename, char *uuid_str = NULL; int len = 0; - len = SLEN("../") + SLEN("../") + SLEN("00/") + SLEN("00/") + - SLEN(UUID0_STR) + 1 /* '\0' */ - ; + len = POSIX_GFID_HANDLE_RELSIZE; if (basename) { len += (strlen(basename) + 1); @@ -336,9 +331,23 @@ posix_handle_pump(xlator_t *this, char *buf, int len, int maxlen, int ret = 0; int blen = 0; int link_len = 0; + char tmpstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + char d2[3] = { + 0, + }; + int index = 0; + int dirfd = 0; + struct posix_private *priv = this->private; + + strncpy(tmpstr, (base_str + pfx_len + 3), 40); + strncpy(d2, (base_str + pfx_len), 2); + index = strtoul(d2, NULL, 16); + dirfd = priv->arrdfd[index]; /* is a directory's symlink-handle */ - ret = sys_readlink(base_str, linkname, 512); + ret = readlinkat(dirfd, tmpstr, linkname, 512); if (ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED, "internal readlink failed on %s ", base_str); @@ -403,6 +412,11 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, int pfx_len; int maxlen; char *buf; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; priv = this->private; @@ -416,12 +430,14 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, buf = alloca(maxlen); } + index = gfid[0]; + dfd = priv->arrdfd[index]; + base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45); base_str = alloca(base_len + 1); base_len = snprintf(base_str, base_len + 1, "%s/%s/%02x/%02x/%s", priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); - pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1; if (basename) { @@ -430,7 +446,8 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, len = snprintf(buf, maxlen, "%s", base_str); } - ret = sys_lstat(base_str, &stat); + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_str); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1)) goto out; @@ -443,7 +460,6 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, if (ret == -1) break; - ret = sys_lstat(buf, &stat); } while ((ret == -1) && errno == ELOOP); @@ -452,8 +468,7 @@ out: } int -posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t buflen) +posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t buflen) { struct posix_private *priv = NULL; char *uuid_str = NULL; @@ -461,16 +476,9 @@ posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, priv = this->private; - len = priv->base_path_length /* option directory "/export" */ - + SLEN("/") + SLEN(GF_HIDDEN_PATH) + SLEN("/") + SLEN("00/") + - SLEN("00/") + SLEN(UUID0_STR) + 1 /* '\0' */ - ; + len = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - if (basename) { - len += (strlen(basename) + 1); - } else { - len += 256; /* worst-case for directory's symlink-handle expansion */ - } + len += 256; /* worst-case for directory's symlink-handle expansion */ if ((buflen < len) || !buf) return len; @@ -478,22 +486,12 @@ posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, uuid_str = uuid_utoa(gfid); if (__is_root_gfid(gfid)) { - if (basename) { - len = snprintf(buf, buflen, "%s/%s", priv->base_path, basename); - } else { - len = snprintf(buf, buflen, "%s", priv->base_path); - } - goto out; - } - - if (basename) { - len = snprintf(buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path, - GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str, basename); + len = snprintf(buf, buflen, "%s", priv->base_path); } else { len = snprintf(buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); } -out: + return len; } @@ -507,7 +505,8 @@ posix_handle_init(xlator_t *this) struct stat rootbuf; struct stat exportbuf; char *rootstr = NULL; - uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int dfd = 0; priv = this->private; @@ -557,9 +556,8 @@ posix_handle_init(xlator_t *this) return -1; } - MAKE_HANDLE_ABSPATH(rootstr, this, gfid); - - ret = sys_stat(rootstr, &rootbuf); + MAKE_HANDLE_ABSPATH_FD(rootstr, this, gfid, dfd); + ret = sys_fstatat(dfd, rootstr, &rootbuf, 0); switch (ret) { case -1: if (errno != ENOENT) { @@ -567,15 +565,14 @@ posix_handle_init(xlator_t *this) "%s", priv->base_path); return -1; } - - ret = posix_handle_mkdir_hashes(this, rootstr); + ret = posix_handle_mkdir_hashes(this, dfd, gfid); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "mkdir %s failed", rootstr); return -1; } - ret = sys_symlink("../../..", rootstr); + ret = sys_symlinkat("../../..", dfd, rootstr); if (ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, "symlink %s creation failed", rootstr); @@ -704,30 +701,18 @@ out: } int -posix_handle_mkdir_hashes(xlator_t *this, const char *newpath) +posix_handle_mkdir_hashes(xlator_t *this, int dirfd, uuid_t gfid) { - char *duppath = NULL; - char *parpath = NULL; - int ret = 0; - - duppath = strdupa(newpath); - parpath = dirname(duppath); - parpath = dirname(duppath); - - ret = sys_mkdir(parpath, 0700); - if (ret == -1 && errno != EEXIST) { - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, - "error mkdir hash-1 %s ", parpath); - return -1; - } - - strcpy(duppath, newpath); - parpath = dirname(duppath); + int ret = -1; + char d2[3] = { + 0, + }; - ret = sys_mkdir(parpath, 0700); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_mkdirat(dirfd, d2, 0700); if (ret == -1 && errno != EEXIST) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, - "error mkdir hash-2 %s ", parpath); + "error mkdir hash-2 %s ", uuid_utoa(gfid)); return -1; } @@ -738,51 +723,59 @@ int posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf) { - char *newpath = NULL; struct stat newbuf; + struct stat hashbuf; int ret = -1; gf_boolean_t link_exists = _gf_false; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; - MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - ret = sys_lstat(newpath, &newbuf); if (ret == -1 && errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", - newpath); + uuid_utoa(gfid)); return -1; } if (ret == -1 && errno == ENOENT) { - ret = posix_handle_mkdir_hashes(this, newpath); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "mkdir %s failed ", newpath); - return -1; + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", uuid_utoa(gfid)); + return -1; + } } - - ret = sys_link(oldpath, newpath); + ret = sys_linkat(AT_FDCWD, oldpath, dfd, newstr); if (ret) { if (errno != EEXIST) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "link %s -> %s" "failed ", - oldpath, newpath); + oldpath, newstr); return -1; } else { link_exists = _gf_true; } } + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - ret = sys_lstat(newpath, &newbuf); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "lstat on %s failed", newpath); + "lstat on %s failed", uuid_utoa(gfid)); return -1; } if ((link_exists) && (!S_ISREG(newbuf.st_mode))) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_HANDLE_CREATE, - "%s - Expected regular file", newpath); + "%s - Expected regular file", uuid_utoa(gfid)); return -1; } } @@ -792,7 +785,8 @@ posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, "mismatching ino/dev between file %s (%lld/%lld) " "and handle %s (%lld/%lld)", oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev, - newpath, (long long)newbuf.st_ino, (long long)newbuf.st_dev); + uuid_utoa(gfid), (long long)newbuf.st_ino, + (long long)newbuf.st_dev); ret = -1; } @@ -806,15 +800,23 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, char *oldpath = NULL; char *newpath = NULL; struct stat newbuf; + struct stat hashbuf; int ret = -1; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); MAKE_HANDLE_RELPATH(oldpath, this, loc->pargfid, loc->name); - ret = sys_lstat(newpath, &newbuf); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + if (ret == -1 && errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", - newpath); + newstr); return -1; } @@ -824,24 +826,30 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, errno = EINVAL; return -1; } - ret = posix_handle_mkdir_hashes(this, newpath); + + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); + if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "mkdir %s failed ", newpath); - return -1; + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", newstr); + return -1; + } } - - ret = sys_symlink(oldpath, newpath); + ret = sys_symlinkat(oldpath, dfd, newstr); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "symlink %s -> %s failed", oldpath, newpath); + "symlink %s -> %s failed", oldpath, newstr); return -1; } - ret = sys_lstat(newpath, &newbuf); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "stat on %s failed ", newpath); + "stat on %s failed ", newstr); return -1; } } @@ -849,7 +857,7 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, ret = sys_stat(real_path, &newbuf); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "stat on %s failed ", newpath); + "stat on %s failed ", real_path); return -1; } @@ -871,26 +879,33 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, int posix_handle_unset_gfid(xlator_t *this, uuid_t gfid) { - char *path = NULL; int ret = 0; struct stat stat; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + struct posix_private *priv = this->private; - MAKE_HANDLE_GFID_PATH(path, this, gfid, NULL); + index = gfid[0]; + dfd = priv->arrdfd[index]; - ret = sys_lstat(path, &stat); + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_utoa(gfid)); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); if (ret == -1) { if (errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s", - path); + newstr); } goto out; } - ret = sys_unlink(path); - if (ret == -1) { + ret = sys_unlinkat(dfd, newstr); + if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, - "unlink %s failed ", path); + "unlink %s is failed", newstr); } out: diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h index c4d7cb14503..f33ed92620d 100644 --- a/xlators/storage/posix/src/posix-handle.h +++ b/xlators/storage/posix/src/posix-handle.h @@ -18,7 +18,7 @@ #define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) \ do { \ - var = alloca(strlen(prefix) + UUID_CANONICAL_FORM_LEN + 1); \ + var = alloca(SLEN(prefix) + UUID_CANONICAL_FORM_LEN + 1); \ strcpy(var, prefix); \ strcat(var, uuid_utoa(pgfid)); \ } while (0) @@ -111,22 +111,23 @@ } \ } while (0) -#define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) \ +#define MAKE_HANDLE_GFID_PATH(var, this, gfid) \ do { \ int __len = 0; \ - __len = posix_handle_gfid_path(this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ + struct posix_private *__priv = this->private; \ + __len = POSIX_GFID_HANDLE_SIZE(__priv->base_path_length); \ + __len += 256; \ var = alloca(__len); \ - __len = posix_handle_gfid_path(this, gfid, base, var, __len); \ + __len = posix_handle_gfid_path(this, gfid, var, __len); \ } while (0) #define MAKE_HANDLE_RELPATH(var, this, gfid, base) \ do { \ int __len; \ - __len = posix_handle_relpath(this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ + __len = POSIX_GFID_HANDLE_RELSIZE; \ + if (base) { \ + __len += (strlen(base) + 1); \ + } \ var = alloca(__len); \ __len = posix_handle_relpath(this, gfid, base, var, __len); \ } while (0) @@ -140,6 +141,16 @@ __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \ } while (0) +#define MAKE_HANDLE_ABSPATH_FD(var, this, gfid, dfd) \ + do { \ + struct posix_private *__priv = this->private; \ + int findex = gfid[0]; \ + int __len = POSIX_GFID_HASH2_LEN; \ + var = alloca(__len); \ + snprintf(var, __len, "%02x/%s", gfid[1], uuid_utoa(gfid)); \ + dfd = __priv->arrdfd[findex]; \ + } while (0) + #define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) \ do { \ char *__parp; \ @@ -183,9 +194,9 @@ /* expand ELOOP */ \ } while (0) +#define POSIX_GFID_HASH2_LEN 45 int -posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t len); +posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t len); int posix_handle_hard(xlator_t *this, const char *path, uuid_t gfid, diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 193afc5f3fa..67db3324083 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -33,17 +33,10 @@ #endif /* GF_BSD_HOST_OS */ #include <fnmatch.h> -#include <glusterfs/glusterfs.h> -#include <glusterfs/checksum.h> -#include <glusterfs/dict.h> -#include <glusterfs/logging.h> #include "posix.h" #include "posix-messages.h" #include "posix-metadata.h" #include "posix-handle.h" -#include <glusterfs/xlator.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> #include <glusterfs/compat-errno.h> #include <glusterfs/compat.h> #include <glusterfs/byte-order.h> @@ -52,11 +45,9 @@ #include <glusterfs/locking.h> #include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include <glusterfs/hashfn.h> #include <glusterfs/glusterfs-acl.h> #include "posix-gfid-path.h" #include <glusterfs/events.h> -#include "glusterfsd.h" #include "glusterfs/syncop.h" #include "timer-wheel.h" #include <sys/types.h> @@ -64,20 +55,20 @@ char *marker_xattrs[] = {"trusted.glusterfs.quota.*", "trusted.glusterfs.*.xtime", NULL}; -char *marker_contri_key = "trusted.*.*.contri"; +static char *marker_contri_key = "trusted.*.*.contri"; static char *posix_ignore_xattrs[] = {"gfid-req", + GLUSTERFS_INTERNAL_FOP_KEY, GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, GLUSTERFS_POSIXLK_COUNT, GLUSTERFS_PARENT_ENTRYLK, GF_GFIDLESS_LOOKUP, GLUSTERFS_INODELK_DOM_COUNT, - GLUSTERFS_INTERNAL_FOP_KEY, NULL}; -static char *list_xattr_ignore_xattrs[] = { - GF_SELINUX_XATTR_KEY, GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, NULL}; +static char *list_xattr_ignore_xattrs[] = {GFID_XATTR_KEY, GF_XATTR_VOL_ID_KEY, + GF_SELINUX_XATTR_KEY, NULL}; gf_boolean_t posix_special_xattr(char **pattern, char *key) @@ -146,9 +137,6 @@ posix_handle_georep_xattrs(call_frame_t *frame, const char *name, int *op_errno, static const char *georep_xattr[] = { "*.glusterfs.*.stime", "*.glusterfs.*.xtime", "*.glusterfs.*.entry_stime", "*.glusterfs.volume-mark.*", NULL}; - if (frame && frame->root) { - pid = frame->root->pid; - } if (!name) { /* No need to do anything here */ @@ -156,6 +144,10 @@ posix_handle_georep_xattrs(call_frame_t *frame, const char *name, int *op_errno, goto out; } + if (frame && frame->root) { + pid = frame->root->pid; + } + if (pid == GF_CLIENT_PID_GSYNCD && is_getxattr) { filter_xattr = _gf_false; @@ -221,14 +213,11 @@ posix_xattr_ignorable(char *key) static int _posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) { - ssize_t xattr_size = -1; - int ret = 0; + ssize_t xattr_size = 256; /* guesstimated initial size of xattr */ + int ret = -1; char *value = NULL; - char val_buf[256] = {0}; - gf_boolean_t have_val = _gf_false; if (!gf_is_valid_xattr_namespace(key)) { - ret = -1; goto out; } @@ -237,46 +226,54 @@ _posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) * of getxattr with NULL buf to find the length and then getxattr with * allocated buf to fill the data. This way we reduce lot of getxattrs. */ - if (filler->real_path) - xattr_size = sys_lgetxattr(filler->real_path, key, val_buf, - sizeof(val_buf) - 1); - else - xattr_size = sys_fgetxattr(filler->fdnum, key, val_buf, - sizeof(val_buf) - 1); - if (xattr_size >= 0) { - have_val = _gf_true; - } else if (xattr_size == -1 && errno != ERANGE) { - ret = -1; + value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char); + if (!value) { goto out; } - if (have_val) { - /*No need to do getxattr*/ - } else if (filler->real_path) { - xattr_size = sys_lgetxattr(filler->real_path, key, NULL, 0); - } else { - xattr_size = sys_fgetxattr(filler->fdnum, key, NULL, 0); - } + if (filler->real_path) + xattr_size = sys_lgetxattr(filler->real_path, key, value, xattr_size); + else + xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size); + + if (xattr_size == -1) { + if (value) { + GF_FREE(value); + value = NULL; + } + /* xattr_size == -1 - failed to fetch the xattr with + * current settings. + * If it was not because value was too small, abort + */ + if (errno != ERANGE) { + goto out; + } + + /* Get the real length needed */ + if (filler->real_path) { + xattr_size = sys_lgetxattr(filler->real_path, key, NULL, 0); + } else { + xattr_size = sys_fgetxattr(filler->fdnum, key, NULL, 0); + } + if (xattr_size == -1) { + goto out; + } - if (xattr_size != -1) { value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char); - if (!value) + if (!value) { goto out; + } - if (have_val) { - memcpy(value, val_buf, xattr_size); + if (filler->real_path) { + xattr_size = sys_lgetxattr(filler->real_path, key, value, + xattr_size); } else { - bzero(value, xattr_size + 1); - if (filler->real_path) { - xattr_size = sys_lgetxattr(filler->real_path, key, value, - xattr_size); - } else { - xattr_size = sys_fgetxattr(filler->fdnum, key, value, - xattr_size); - } + xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size); } if (xattr_size == -1) { + GF_FREE(value); + value = NULL; if (filler->real_path) gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, "getxattr failed. path: %s, key: %s", @@ -285,24 +282,25 @@ _posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, "getxattr failed. gfid: %s, key: %s", uuid_utoa(filler->fd->inode->gfid), key); - GF_FREE(value); goto out; } + } - value[xattr_size] = '\0'; - ret = dict_set_bin(filler->xattr, key, value, xattr_size); - if (ret < 0) { - if (filler->real_path) - gf_msg_debug(filler->this->name, 0, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - else - gf_msg_debug(filler->this->name, 0, - "dict set failed. gfid: %s, key: %s", - uuid_utoa(filler->fd->inode->gfid), key); + value[xattr_size] = '\0'; + ret = dict_set_bin(filler->xattr, key, value, xattr_size); + + if (ret < 0) { + if (value) GF_FREE(value); - goto out; - } + if (filler->real_path) + gf_msg_debug(filler->this->name, 0, + "dict set failed. path: %s, key: %s", + filler->real_path, key); + else + gf_msg_debug(filler->this->name, 0, + "dict set failed. gfid: %s, key: %s", + uuid_utoa(filler->fd->inode->gfid), key); + goto out; } ret = 0; out: @@ -367,11 +365,10 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler) list_offset = 0; while (remaining_size > 0) { - snprintf(key, sizeof(key), "%s", list + list_offset); + len = snprintf(key, sizeof(key), "%s", list + list_offset); if (fnmatch(marker_contri_key, key, 0) == 0) { - ret = _posix_xattr_get_set_from_backend(filler, key); + (void)_posix_xattr_get_set_from_backend(filler, key); } - len = strlen(key); remaining_size -= (len + 1); list_offset += (len + 1); } @@ -390,6 +387,9 @@ _posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key) int i = 0, ret = 0; tmp_key = ptr = gf_strdup(key); + if (tmp_key == NULL) { + return -1; + } for (i = 0; i < 4; i++) { token = strtok_r(tmp_key, ".", &saveptr); tmp_key = NULL; @@ -423,7 +423,7 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, { posix_xattr_filler_t *filler = xattrargs; int ret = -1; - int len; + int len = 0; char *databuf = NULL; int _fd = -1; ssize_t req_size = 0; @@ -438,8 +438,11 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, if (posix_xattr_ignorable(key)) goto out; + + len = strlen(key); /* should size be put into the data_t ? */ - if (!strcmp(key, GF_CONTENT_KEY) && IA_ISREG(filler->stbuf->ia_type)) { + if ((filler->stbuf != NULL && IA_ISREG(filler->stbuf->ia_type)) && + (len == SLEN(GF_CONTENT_KEY) && !strcmp(key, GF_CONTENT_KEY))) { if (!filler->real_path) goto out; @@ -504,7 +507,8 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, sys_close(_fd); GF_FREE(databuf); } - } else if (!strcmp(key, GLUSTERFS_OPEN_FD_COUNT)) { + } else if (len == SLEN(GLUSTERFS_OPEN_FD_COUNT) && + !strcmp(key, GLUSTERFS_OPEN_FD_COUNT)) { inode = _get_filler_inode(filler); if (!inode || gf_uuid_is_null(inode->gfid)) goto out; @@ -513,7 +517,8 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set dictionary value for %s", key); } - } else if (!strcmp(key, GLUSTERFS_ACTIVE_FD_COUNT)) { + } else if (len == SLEN(GLUSTERFS_ACTIVE_FD_COUNT) && + !strcmp(key, GLUSTERFS_ACTIVE_FD_COUNT)) { inode = _get_filler_inode(filler); if (!inode || gf_uuid_is_null(inode->gfid)) goto out; @@ -522,7 +527,8 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set dictionary value for %s", key); } - } else if (!strcmp(key, GET_ANCESTRY_PATH_KEY)) { + } else if (len == SLEN(GET_ANCESTRY_PATH_KEY) && + !strcmp(key, GET_ANCESTRY_PATH_KEY)) { /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt * fetching it via path-based fops. Hence, leaving it as it is * for now. @@ -537,7 +543,7 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, goto out; } - ret = dict_set_dynstr(filler->xattr, GET_ANCESTRY_PATH_KEY, path); + ret = dict_set_dynstr_sizen(filler->xattr, GET_ANCESTRY_PATH_KEY, path); if (ret < 0) { GF_FREE(path); goto out; @@ -545,9 +551,10 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, } else if (fnmatch(marker_contri_key, key, 0) == 0) { ret = _posix_get_marker_quota_contributions(filler, key); - } else if (strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) { - ret = dict_set(filler->xattr, GF_REQUEST_LINK_COUNT_XDATA, data); - } else if (strcmp(key, GF_GET_SIZE) == 0) { + } else if (len == SLEN(GF_REQUEST_LINK_COUNT_XDATA) && + strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) { + ret = dict_set_sizen(filler->xattr, GF_REQUEST_LINK_COUNT_XDATA, data); + } else if (len == SLEN(GF_GET_SIZE) && strcmp(key, GF_GET_SIZE) == 0) { if (filler->stbuf && IA_ISREG(filler->stbuf->ia_type)) { ret = dict_set_uint64(filler->xattr, GF_GET_SIZE, filler->stbuf->ia_size); @@ -572,7 +579,7 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, /* ACL_TYPE_DEFAULT is not supported for non-directory, skip */ if (!IA_ISDIR(stbuf.ia_type) && - !strncmp(key, GF_POSIX_ACL_DEFAULT, strlen(GF_POSIX_ACL_DEFAULT))) + !strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT))) goto out; ret = posix_pacl_get(filler->real_path, filler->fdnum, key, &value); @@ -584,7 +591,7 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, goto out; } - ret = dict_set_dynstr(filler->xattr, (char *)key, value); + ret = dict_set_dynstrn(filler->xattr, (char *)key, len, value); if (ret < 0) { GF_FREE(value); gf_msg(filler->this->name, GF_LOG_ERROR, errno, @@ -817,6 +824,11 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, "lstat failed on %s", path); errno = op_errno; /*gf_msg could have changed errno*/ + } else { + op_errno = errno; + gf_msg_debug(this->name, 0, "lstat failed on %s (%s)", path, + strerror(errno)); + errno = op_errno; /*gf_msg could have changed errno*/ } goto out; } @@ -832,17 +844,26 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, iatt_from_stat(&stbuf, &lstatbuf); - if (inode && priv->ctime) { - if (!inode_locked) { - ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + if (priv->ctime) { + if (inode) { + if (!inode_locked) { + ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + } else { + ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + } + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on gfid: %s", + uuid_utoa(inode->gfid)); + goto out; + } } else { - ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); - } - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, - "posix get mdata failed on gfid: %s", - uuid_utoa(inode->gfid)); - goto out; + ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on path: %s", path); + goto out; + } } } @@ -885,18 +906,17 @@ out: } static void -_handle_list_xattr(dict_t *xattr_req, const char *real_path, int fdnum, - posix_xattr_filler_t *filler) +_handle_list_xattr(posix_xattr_filler_t *filler) { int32_t list_offset = 0; ssize_t remaining_size = 0; char *key = NULL; int len; - list_offset = 0; remaining_size = filler->list_size; while (remaining_size > 0) { key = filler->list + list_offset; + len = strlen(key); if (gf_get_index_by_elem(list_xattr_ignore_xattrs, key) >= 0) goto next; @@ -910,12 +930,11 @@ _handle_list_xattr(dict_t *xattr_req, const char *real_path, int fdnum, if (posix_is_gfid2path_xattr(key)) goto next; - if (dict_get(filler->xattr, key)) + if (dict_getn(filler->xattr, key, len)) goto next; (void)_posix_xattr_get_set_from_backend(filler, key); next: - len = strlen(key); remaining_size -= (len + 1); list_offset += (len + 1); @@ -933,8 +952,8 @@ posix_xattr_fill(xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd, }; gf_boolean_t list = _gf_false; - if (dict_get(xattr_req, "list-xattr")) { - dict_del(xattr_req, "list-xattr"); + if (dict_get_sizen(xattr_req, "list-xattr")) { + dict_del_sizen(xattr_req, "list-xattr"); list = _gf_true; } @@ -954,7 +973,7 @@ posix_xattr_fill(xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd, _get_list_xattr(&filler); dict_foreach(xattr_req, _posix_xattr_get_set, &filler); if (list) - _handle_list_xattr(xattr_req, real_path, fdnum, &filler); + _handle_list_xattr(&filler); GF_FREE(filler.list); out: @@ -1008,6 +1027,7 @@ posix_gfid_set(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req, if (sys_lstat(path, &stat) != 0) { ret = -1; + *op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "lstat on %s failed", path); goto out; @@ -1050,13 +1070,13 @@ verify_handle: ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); out: - if (!(*op_errno)) + if (ret && !(*op_errno)) *op_errno = errno; return ret; } #ifdef HAVE_SYS_ACL_H -int +static int posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s) { int ret = -1; @@ -1179,11 +1199,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key, #endif int -posix_handle_pair(xlator_t *this, const char *real_path, char *key, +posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, data_t *value, int flags, struct iatt *stbuf) { int sys_ret = -1; int ret = 0; + int op_errno = 0; + struct mdata_iatt mdata_iatt = { + 0, + }; #ifdef GF_DARWIN_HOST_OS const int error_code = EINVAL; #else @@ -1200,13 +1224,31 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key, if (stbuf && IS_DHT_LINKFILE_MODE(stbuf)) goto out; ret = posix_pacl_set(real_path, -1, key, value->data); - } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, strlen(key)) && stbuf && - IS_DHT_LINKFILE_MODE(stbuf)) { + } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, + SLEN(POSIX_ACL_ACCESS_XATTR)) && + stbuf && IS_DHT_LINKFILE_MODE(stbuf)) { goto out; - } else if (!strncmp(key, GF_INTERNAL_CTX_KEY, strlen(key))) { + } else if (!strncmp(key, GF_INTERNAL_CTX_KEY, SLEN(GF_INTERNAL_CTX_KEY))) { /* ignore this key value pair */ ret = 0; goto out; + } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) { + /* This is either by rebalance or self heal. Create the xattr if it's + * not present. Compare and update the larger value if the xattr is + * already present. + */ + if (loc == NULL) { + ret = -EINVAL; + goto out; + } + posix_mdata_iatt_from_disk(&mdata_iatt, + (posix_mdata_disk_t *)value->data); + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + ret = -op_errno; + } + goto out; } else { sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags); #ifdef GF_DARWIN_HOST_OS @@ -1253,8 +1295,9 @@ posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, } else if (posix_is_gfid2path_xattr(key)) { ret = -ENOTSUP; goto out; - } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, strlen(key)) && stbuf && - IS_DHT_LINKFILE_MODE(stbuf)) { + } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, + SLEN(POSIX_ACL_ACCESS_XATTR)) && + stbuf && IS_DHT_LINKFILE_MODE(stbuf)) { goto out; } @@ -1313,7 +1356,7 @@ del_stale_dir_handle(xlator_t *this, uuid_t gfid) 0, }; - MAKE_HANDLE_GFID_PATH(hpath, this, gfid, NULL); + MAKE_HANDLE_GFID_PATH(hpath, this, gfid); /* check that it is valid directory handle */ size = sys_lstat(hpath, &stbuf); @@ -1423,12 +1466,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data) this = data; priv = this->private; + pthread_mutex_lock(&priv->janitor_mutex); + { + if (priv->janitor_task_stop) { + priv->janitor_task_stop = _gf_false; + pthread_cond_signal(&priv->janitor_cond); + pthread_mutex_unlock(&priv->janitor_mutex); + goto out; + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + LOCK(&priv->lock); { __posix_janitor_timer_start(this); } UNLOCK(&priv->lock); +out: return 0; } @@ -1447,7 +1502,10 @@ posix_janitor_task(void *data) old_this = THIS; THIS = this; - time(&now); + if (!priv) + goto out; + + now = gf_time(); if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { if (priv->disable_landfill_purge) { gf_msg_debug(this->name, 0, @@ -1466,6 +1524,7 @@ posix_janitor_task(void *data) THIS = old_this; +out: return 0; } @@ -1502,7 +1561,7 @@ __posix_janitor_timer_start(xlator_t *this) timer->expires = priv->janitor_sleep_duration; timer->function = posix_janitor_task_initator; timer->data = this; - gf_tw_add_timer(this->ctx->tw->timer_wheel, timer); + gf_tw_add_timer(glusterfs_ctx_tw_get(this->ctx), timer); return; } @@ -1534,114 +1593,116 @@ unlock: } static struct posix_fd * -janitor_get_next_fd(glusterfs_ctx_t *ctx, int32_t janitor_sleep) +janitor_get_next_fd(glusterfs_ctx_t *ctx) { struct posix_fd *pfd = NULL; - struct timespec timeout; + while (list_empty(&ctx->janitor_fds)) { + if (ctx->pxl_count == 0) { + return NULL; + } - pthread_mutex_lock(&ctx->janitor_lock); - { - if (list_empty(&ctx->janitor_fds)) { - time(&timeout.tv_sec); - timeout.tv_sec += janitor_sleep; - timeout.tv_nsec = 0; + pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); + } - pthread_cond_timedwait(&ctx->janitor_cond, &ctx->janitor_lock, - &timeout); - goto unlock; - } + pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); + list_del_init(&pfd->list); + + return pfd; +} - pfd = list_entry(ctx->janitor_fds.next, struct posix_fd, list); +static void +posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) +{ + THIS = xl; - list_del(ctx->janitor_fds.next); + if (pfd->dir == NULL) { + gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); + sys_close(pfd->fd); + } else { + gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); + sys_closedir(pfd->dir); } -unlock: - pthread_mutex_unlock(&ctx->janitor_lock); - return pfd; + GF_FREE(pfd); } static void * posix_ctx_janitor_thread_proc(void *data) { - xlator_t *this = NULL; + xlator_t *xl; struct posix_fd *pfd; glusterfs_ctx_t *ctx = NULL; - struct posix_private *priv = NULL; - int32_t sleep_duration = 0; + struct posix_private *priv_fd; - this = data; - ctx = THIS->ctx; - THIS = this; + ctx = data; - priv = this->private; - sleep_duration = priv->janitor_sleep_duration; - while (1) { - pfd = janitor_get_next_fd(ctx, sleep_duration); - if (pfd) { - if (pfd->dir == NULL) { - gf_msg_trace(this->name, 0, "janitor: closing file fd=%d", - pfd->fd); - sys_close(pfd->fd); - } else { - gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", - pfd->dir); - sys_closedir(pfd->dir); - } + pthread_mutex_lock(&ctx->fd_lock); - GF_FREE(pfd); - } + while ((pfd = janitor_get_next_fd(ctx)) != NULL) { + pthread_mutex_unlock(&ctx->fd_lock); + + xl = pfd->xl; + posix_close_pfd(xl, pfd); + + pthread_mutex_lock(&ctx->fd_lock); + + priv_fd = xl->private; + priv_fd->rel_fdcount--; + if (!priv_fd->rel_fdcount) + pthread_cond_signal(&priv_fd->fd_cond); } + pthread_mutex_unlock(&ctx->fd_lock); + return NULL; } int posix_spawn_ctx_janitor_thread(xlator_t *this) { - struct posix_private *priv = NULL; int ret = 0; glusterfs_ctx_t *ctx = NULL; - priv = this->private; - ctx = THIS->ctx; + ctx = this->ctx; - LOCK(&priv->lock); + pthread_mutex_lock(&ctx->fd_lock); { - if (!ctx->janitor) { - pthread_mutex_init(&ctx->janitor_lock, NULL); - pthread_cond_init(&ctx->janitor_cond, NULL); - INIT_LIST_HEAD(&ctx->janitor_fds); - + if (ctx->pxl_count++ == 0) { ret = gf_thread_create(&ctx->janitor, NULL, - posix_ctx_janitor_thread_proc, this, + posix_ctx_janitor_thread_proc, ctx, "posixctxjan"); if (ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, - "spawning janitor " - "thread failed"); - goto unlock; + "spawning janitor thread failed"); + ctx->pxl_count--; } } } -unlock: - UNLOCK(&priv->lock); + pthread_mutex_unlock(&ctx->fd_lock); + return ret; } static int -is_fresh_file(int64_t ctime_sec) +is_fresh_file(struct timespec *ts) { - struct timeval tv; + struct timespec now; + int64_t elapsed; - gettimeofday(&tv, NULL); + timespec_now_realtime(&now); + elapsed = (int64_t)gf_tsdiff(ts, &now); - if ((ctime_sec >= (tv.tv_sec - 1)) && (ctime_sec <= tv.tv_sec)) - return 1; + if (elapsed < 0) { + /* The file has been modified in the future !!! + * Is it fresh ? previous implementation considered this as a + * non-fresh file, so maintaining the same behavior. */ + return 0; + } - return 0; + /* If the file is newer than a second, we consider it fresh. */ + return elapsed < 1000000; } int @@ -1702,7 +1763,11 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) } ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); if (ret != 16) { - if (is_fresh_file(stbuf.ia_ctime)) { + /* TODO: This is a very hacky way of doing this, and very prone to + * errors and unexpected behavior. This should be changed. */ + struct timespec ts = {.tv_sec = stbuf.ia_ctime, + .tv_nsec = stbuf.ia_ctime_nsec}; + if (is_fresh_file(&ts)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1714,7 +1779,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) } ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); if (ret != 16) { - if (is_fresh_file(stat.st_ctime)) { + /* TODO: This is a very hacky way of doing this, and very prone to + * errors and unexpected behavior. This should be changed. */ + if (is_fresh_file(&stat.st_ctim)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1722,7 +1789,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) } } - posix_gfid_set(this, path, loc, xattr_req, GF_CLIENT_PID_MAX, &ret); + (void)posix_gfid_set(this, path, loc, xattr_req, GF_CLIENT_PID_MAX, &ret); return 0; } @@ -1785,8 +1852,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) return 0; } - ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE, - filler->stbuf); + ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + XATTR_CREATE, filler->stbuf); if (ret < 0) { errno = -ret; return -1; @@ -1795,7 +1862,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) } int -posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) +posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, + dict_t *dict) { int ret = -1; @@ -1809,6 +1877,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) filler.this = this; filler.real_path = path; filler.stbuf = NULL; + filler.loc = loc; ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler); @@ -1841,8 +1910,8 @@ __posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd_p, if (!fd_is_anonymous(fd)) { gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_READ_FAILED, "Failed to get fd context for a non-anonymous fd, " - "file: %s, gfid: %s", - real_path, uuid_utoa(fd->inode->gfid)); + "gfid: %s", + uuid_utoa(fd->inode->gfid)); op_errno = EINVAL; goto out; } @@ -1940,13 +2009,12 @@ posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, int *op_errno) return ret; } -int -posix_fs_health_check(xlator_t *this) +static int +posix_fs_health_check(xlator_t *this, char *file_path) { struct posix_private *priv = NULL; int ret = -1; - char *subvol_path = NULL; - char timestamp[256] = { + char timestamp[GF_TIMESTR_SIZE] = { 0, }; int fd = -1; @@ -1955,25 +2023,15 @@ posix_fs_health_check(xlator_t *this) 0, }; char buff[256] = {0}; - char file_path[PATH_MAX] = {0}; char *op = NULL; int op_errno = 0; - int cnt = 0; + int cnt; int timeout = 0; struct aiocb aiocb; - GF_VALIDATE_OR_GOTO(this->name, this, out); priv = this->private; - GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); - subvol_path = priv->base_path; timeout = priv->health_check_timeout; - snprintf(file_path, sizeof(file_path) - 1, "%s/%s/health_check", - subvol_path, GF_HIDDEN_PATH); - - time_sec = time(NULL); - gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); - timelen = strlen(timestamp); fd = open(file_path, O_CREAT | O_WRONLY | O_TRUNC, 0644); if (fd == -1) { @@ -1981,6 +2039,11 @@ posix_fs_health_check(xlator_t *this) op = "open_for_write"; goto out; } + + time_sec = gf_time(); + gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); + timelen = strlen(timestamp); + memset(&aiocb, 0, sizeof(struct aiocb)); aiocb.aio_fildes = fd; aiocb.aio_buf = timestamp; @@ -1992,6 +2055,7 @@ posix_fs_health_check(xlator_t *this) goto out; } + cnt = 0; /* Wait until write completion */ while ((aio_error(&aiocb) == EINPROGRESS) && (++cnt <= timeout)) sleep(1); @@ -2000,7 +2064,6 @@ posix_fs_health_check(xlator_t *this) if (ret != 0) { op_errno = errno; op = "aio_write_error"; - ret = -1; goto out; } @@ -2039,7 +2102,6 @@ posix_fs_health_check(xlator_t *this) if (ret != 0) { op_errno = errno; op = "aio_read_error"; - ret = -1; goto out; } @@ -2062,13 +2124,20 @@ out: if (fd != -1) { sys_close(fd); } + if (ret && file_path[0]) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, - "%s() on %s returned", op, file_path); - gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, - "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, - file_path, strerror(op_errno), priv->hostname, priv->base_path, - timeout); + "%s() on %s returned ret is %d error is %s", op, file_path, ret, + ret != -1 ? strerror(ret) : strerror(op_errno)); + + if ((op_errno == EAGAIN) || (ret == EAGAIN)) { + ret = 0; + } else { + gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, + "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, + file_path, strerror(op_errno), priv->hostname, + priv->base_path, timeout); + } } return ret; } @@ -2076,30 +2145,30 @@ out: static void * posix_health_check_thread_proc(void *data) { - xlator_t *this = NULL; - struct posix_private *priv = NULL; - uint32_t interval = 0; + xlator_t *this = data; + struct posix_private *priv = this->private; + uint32_t interval = priv->health_check_interval; int ret = -1; xlator_t *top = NULL; xlator_t *victim = NULL; xlator_list_t **trav_p = NULL; int count = 0; gf_boolean_t victim_found = _gf_false; - glusterfs_ctx_t *ctx = NULL; - - this = data; - priv = this->private; - ctx = THIS->ctx; + glusterfs_ctx_t *ctx = THIS->ctx; + char file_path[PATH_MAX]; /* prevent races when the interval is updated */ - interval = priv->health_check_interval; if (interval == 0) goto out; + snprintf(file_path, sizeof(file_path) - 1, "%s/%s/health_check", + priv->base_path, GF_HIDDEN_PATH); + gf_msg_debug(this->name, 0, "health-check thread started, " + "on path %s, " "interval = %d seconds", - interval); + file_path, interval); while (1) { /* aborting sleep() is a request to exit this thread, sleep() * will normally not return when cancelled */ @@ -2110,7 +2179,7 @@ posix_health_check_thread_proc(void *data) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); /* Do the health-check.*/ - ret = posix_fs_health_check(this); + ret = posix_fs_health_check(this, file_path); if (ret < 0 && priv->health_check_active) goto abort; if (!priv->health_check_active) @@ -2237,17 +2306,17 @@ posix_disk_space_check(xlator_t *this) struct posix_private *priv = NULL; char *subvol_path = NULL; int op_ret = 0; - int percent = 0; + double size = 0; + double percent = 0; struct statvfs buf = {0}; - uint64_t totsz = 0; - uint64_t freesz = 0; + double totsz = 0; + double freesz = 0; - GF_VALIDATE_OR_GOTO(this->name, this, out); + GF_VALIDATE_OR_GOTO("posix-helpers", this, out); priv = this->private; GF_VALIDATE_OR_GOTO(this->name, priv, out); subvol_path = priv->base_path; - percent = priv->disk_reserve; op_ret = sys_statvfs(subvol_path, &buf); @@ -2256,10 +2325,17 @@ posix_disk_space_check(xlator_t *this) "statvfs failed on %s", subvol_path); goto out; } - totsz = (buf.f_blocks * buf.f_bsize); - freesz = (buf.f_bfree * buf.f_bsize); - if (freesz <= ((totsz * percent) / 100)) { + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + totsz = (buf.f_blocks * buf.f_bsize); + size = ((totsz * percent) / 100); + } else { + size = priv->disk_reserve; + } + + freesz = (buf.f_bfree * buf.f_bsize); + if (freesz <= size) { priv->disk_space_full = 1; } else { priv->disk_space_full = 0; @@ -2329,7 +2405,7 @@ posix_spawn_disk_space_check_thread(xlator_t *xl) ret = gf_thread_create(&priv->disk_space_check, NULL, posix_disk_space_check_thread_proc, xl, - "posix_reserve"); + "posixrsv"); if (ret) { priv->disk_space_check_active = _gf_false; gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, @@ -2381,7 +2457,7 @@ posix_fsyncer_process(xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync) return; } - if (do_fsync) { + if (do_fsync && pfd) { if (stub->args.datasync) ret = sys_fdatasync(pfd->fd); else @@ -2409,23 +2485,8 @@ posix_fsyncer_syncfs(xlator_t *this, struct list_head *head) stub = list_entry(head->prev, call_stub_t, list); ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL); - if (ret) - return; - -#ifdef GF_LINUX_HOST_OS - /* syncfs() is not "declared" in RHEL's glibc even though - the kernel has support. - */ -#include <sys/syscall.h> -#include <unistd.h> -#ifdef SYS_syncfs - syscall(SYS_syncfs, pfd->fd); -#else - sync(); -#endif -#else - sync(); -#endif + if (!ret) + (void)gf_syncfs(pfd->fd); } void * @@ -2446,7 +2507,7 @@ posix_fsyncer(void *d) count = posix_fsyncer_pick(this, &list); - usleep(priv->batch_fsync_delay_usec); + gf_nanosleep(priv->batch_fsync_delay_usec * GF_US_IN_NS); gf_msg_debug(this->name, 0, "picked %d fsyncs", count); @@ -2718,10 +2779,10 @@ posix_resolve_dirgfid_to_path(const uuid_t dirgfid, const char *brick_path, linkname[len] = '\0'; - pgfidstr = strtok_r(linkname + strlen("../../00/00/"), "/", &saveptr); + pgfidstr = strtok_r(linkname + SLEN("../../00/00/"), "/", &saveptr); dir_name = strtok_r(NULL, "/", &saveptr); - if (strlen(pre_dir_name) != 0) { /* Remove '/' at the end */ + if (pre_dir_name[0] != '\0') { /* Remove '/' at the end */ len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name); } else { len = snprintf(result, PATH_MAX, "%s", dir_name); @@ -2777,7 +2838,8 @@ __posix_inode_ctx_get(inode_t *inode, xlator_t *this) pthread_mutex_init(&ctx_p->write_atomic_lock, NULL); pthread_mutex_init(&ctx_p->pgfid_lock, NULL); - ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set(inode, this, &ctx_uint); if (ret < 0) { pthread_mutex_destroy(&ctx_p->xattrop_lock); pthread_mutex_destroy(&ctx_p->write_atomic_lock); @@ -2849,7 +2911,7 @@ posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, posix_inode_ctx_t **ctx) gf_boolean_t posix_is_bulk_removexattr(char *name, dict_t *xdata) { - if (name && (strlen(name) == 0) && xdata) + if (name && (name[0] == '\0') && xdata) return _gf_true; return _gf_false; } @@ -2929,7 +2991,7 @@ posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata) LOCK(&fd->inode->lock); { - val = dict_get(xdata, GF_PROTECT_FROM_EXTERNAL_WRITES); + val = dict_get_sizen(xdata, GF_PROTECT_FROM_EXTERNAL_WRITES); if (val) { ret = sys_fsetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES, val->data, val->len, 0); @@ -2942,7 +3004,7 @@ posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata) goto out; } - if (dict_get(xdata, GF_AVOID_OVERWRITE)) { + if (dict_get_sizen(xdata, GF_AVOID_OVERWRITE)) { xattrsize = sys_fgetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, 0); if ((xattrsize == -1) && @@ -3219,6 +3281,11 @@ posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state, char *value = NULL; size_t xattrsize = 0; + if (!rsp) { + ret = -1; + goto out; + } + if (!(*rsp)) { *rsp = dict_new(); if (!(*rsp)) { @@ -3312,14 +3379,20 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, { gf_cs_obj_state state = GF_CS_ERROR; int ret = 0; + gf_boolean_t is_cs_obj_status = _gf_false; + gf_boolean_t is_cs_obj_repair = _gf_false; + + if (dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS)) + is_cs_obj_status = _gf_true; + if (dict_get_sizen(xattr_req, GF_CS_OBJECT_REPAIR)) + is_cs_obj_repair = _gf_true; - if (!(dict_get(xattr_req, GF_CS_OBJECT_STATUS) || - dict_get(xattr_req, GF_CS_OBJECT_REPAIR))) + if (!(is_cs_obj_status || is_cs_obj_repair)) return 0; if (fd) { LOCK(&fd->inode->lock); - if (dict_get(xattr_req, GF_CS_OBJECT_STATUS)) { + if (is_cs_obj_status) { state = posix_cs_check_status(this, NULL, pfd, buf); gf_msg_debug(this->name, 0, "state : %d", state); ret = posix_cs_set_state(this, xattr_rsp, state, NULL, pfd); @@ -3339,7 +3412,7 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, } } - if (dict_get(xattr_req, GF_CS_OBJECT_REPAIR)) { + if (is_cs_obj_repair) { state = posix_cs_check_status(this, NULL, pfd, buf); gf_msg_debug(this->name, 0, "state : %d", state); @@ -3370,7 +3443,7 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, } LOCK(&loc->inode->lock); - if (dict_get(xattr_req, GF_CS_OBJECT_STATUS)) { + if (is_cs_obj_status) { state = posix_cs_check_status(this, realpath, NULL, buf); gf_msg_debug(this->name, 0, "state : %d", state); ret = posix_cs_set_state(this, xattr_rsp, state, realpath, NULL); @@ -3390,7 +3463,7 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, } } - if (dict_get(xattr_req, GF_CS_OBJECT_REPAIR)) { + if (is_cs_obj_repair) { state = posix_cs_check_status(this, realpath, NULL, buf); gf_msg_debug(this->name, 0, "state : %d", state); @@ -3444,3 +3517,150 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) out: return ret; } + +void +posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) +{ + int ret = 0; + char val[4096] = { + 0, + }; + + if (!xattr_req) + return; + + if (!dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS)) + return; + + if (fd != -1) { + ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_size = atoll(val); + } else { + /* Safe to assume that the other 2 xattrs are also not set*/ + return; + } + ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blksize = atoll(val); + } + ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blocks = atoll(val); + } + } else { + ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_size = atoll(val); + } else { + /* Safe to assume that the other 2 xattrs are also not set*/ + return; + } + ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blksize = atoll(val); + } + ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blocks = atoll(val); + } + } +} + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) +{ + int op_ret = 0; + ssize_t size = 0; + char value_buf[4096] = { + 0, + }; + gf_boolean_t have_val = _gf_false; + data_t *arg_data = NULL; + char *xattr_name = NULL; + size_t xattr_len = 0; + gf_boolean_t is_stale = _gf_false; + + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name == NULL) { + op_ret = 0; + return is_stale; + } + + xattr_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_len); + if (!arg_data) { + op_ret = 0; + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + return is_stale; + } + + size = sys_lgetxattr(par_path, xattr_name, value_buf, + sizeof(value_buf) - 1); + + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) path (%s) failed due to" + " buffer overflow", + xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + } + if (size < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed, path : %s", xattr_name, + par_path); + goto out; + } + } + + if (!have_val) { + size = sys_lgetxattr(par_path, xattr_name, value_buf, size); + if (size < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed (%s)", xattr_name, + strerror(errno)); + goto out; + } + } + + if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { + gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, + "failing preop as on-disk xattr value differs from argument " + "value for key %s", + xattr_name); + op_ret = -1; + } + +out: + dict_deln(xdata, xattr_name, xattr_len); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + + if (op_ret == -1) { + is_stale = _gf_true; + } + + return is_stale; +} + +/* Delete user xattr from the file at the file-path specified by data and from + * dict */ +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data) +{ + int ret; + char *real_path = data; + + ret = sys_lremovexattr(real_path, k); + if (ret) { + gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, + "removexattr failed. key %s path %s", k, real_path); + } + + dict_del(dict, k); + + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 7dbbd3d6d61..6d54d37e5aa 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #include <regex.h> #ifndef GF_BSD_HOST_OS @@ -37,14 +36,10 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include <glusterfs/glusterfs.h> #include <glusterfs/checksum.h> #include <glusterfs/dict.h> #include <glusterfs/logging.h> -#include "posix.h" -#include <glusterfs/xlator.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> +#include "posix-handle.h" #include <glusterfs/compat-errno.h> #include <glusterfs/compat.h> #include <glusterfs/byte-order.h> @@ -53,14 +48,13 @@ #include <glusterfs/locking.h> #include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include <glusterfs/hashfn.h> -#include "posix-aio.h" #include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" #include "posix-metadata.h" #include <glusterfs/events.h> #include "posix-gfid-path.h" #include <glusterfs/compat-uuid.h> +#include <glusterfs/common-utils.h> extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -108,6 +102,61 @@ extern char *marker_xattrs[]; static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, NULL}; +void +posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, + char *loc) +{ + int ret = 0; + uuid_t uuid; + + if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS)) + return; + + if (!(*rsp)) { + *rsp = dict_new(); + if (!(*rsp)) { + return; + } + } + + if (fd != -1) { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for fd %d", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", + GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } + } else { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for loc %s", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", + GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } + } + return; +} + int32_t posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { @@ -136,11 +185,15 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (op_ret == -1) { op_errno = errno; if (op_errno == ENOENT) { - gf_msg_debug(this->name, 0, "lstat on %s failed: %s", - real_path ? real_path : "<null>", strerror(op_errno)); + gf_msg_debug(this->name, 0, + "lstat on gfid-handle %s (path: %s)" + "failed: %s", + real_path ? real_path : "<null>", loc->path, + strerror(op_errno)); } else { gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_path ? real_path : "<null>"); + "lstat on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); } goto out; } @@ -150,8 +203,11 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, &xattr_rsp, _gf_true); + + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); } + posix_update_iatt_buf(&buf, -1, real_path, xdata); op_ret = 0; out: @@ -326,8 +382,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "setattr (lstat) on %s failed", - real_path ? real_path : "<null>"); + "setattr (lstat) on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); goto out; } @@ -348,9 +404,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED, - "setattr (chmod) on %s " + "setattr (chmod) on gfid-handle %s (path: %s) " "failed", - real_path); + real_path, loc->path); goto out; } } @@ -360,31 +416,18 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, - "setattr (utimes) on %s " + "setattr (utimes) on gfid-handle %s (path: %s) " "failed", - real_path); + real_path, loc->path); goto out; } - posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf, - valid); + posix_update_utime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); } - if (valid & GF_SET_ATTR_CTIME && !priv->ctime) { - /* - * If ctime is not enabled, we have no means to associate an - * arbitrary ctime with the file, so as a fallback, we ignore - * the ctime payload and update the file ctime to current time - * (which is possible directly with the POSIX API). - */ - op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL); - if (op_ret == -1) { - op_errno = errno; - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, - "setattr (utimes) on %s " - "failed", - real_path); - goto out; - } + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); } if (!valid) { @@ -392,9 +435,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, - "lchown (%s, -1, -1) " + "lchown (gfid-handle: %s, path: %s, -1, -1) " "failed", - real_path); + real_path, loc->path); goto out; } @@ -405,23 +448,18 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "setattr (lstat) on %s failed", real_path); + "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path, + loc->path); goto out; } - if (valid & GF_SET_ATTR_CTIME && priv->ctime) { - /* - * If we got ctime payload, we override - * the ctime of statpost with that. - */ - statpost.ia_ctime = stbuf->ia_ctime; - statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec; - } posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); if (xdata) xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &statpost); + posix_update_iatt_buf(&statpre, -1, real_path, xdata); + posix_update_iatt_buf(&statpost, -1, real_path, xdata); op_ret = 0; out: @@ -484,18 +522,19 @@ posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) struct stat stat = { 0, }; - - ret = sys_fstat(fd, &stat); - if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%d", - fd); - goto out; - } + gf_boolean_t fstat_executed = _gf_false; if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { tv[0].tv_sec = stbuf->ia_atime; tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; } else { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + fstat_executed = _gf_true; /* atime is not given, use current values */ tv[0].tv_sec = ST_ATIM_SEC(&stat); tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000; @@ -505,6 +544,14 @@ posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) tv[1].tv_sec = stbuf->ia_mtime; tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; } else { + if (!fstat_executed) { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + } /* mtime is not given, use current values */ tv[1].tv_sec = ST_MTIM_SEC(&stat); tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000; @@ -530,6 +577,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt statpost = { 0, }; + struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; dict_t *xattr_rsp = NULL; int32_t ret = -1; @@ -542,6 +590,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); @@ -590,8 +641,13 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, fd); goto out; } - posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf, - valid); + posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); + } + + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); } if (!valid) { @@ -642,6 +698,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, gf_boolean_t locked = _gf_false; posix_inode_ctx_t *ctx = NULL; struct posix_private *priv = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; DECLARE_OLD_FS_ID_VAR; @@ -661,7 +721,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, if (priv->disk_reserve) posix_disk_space_check(this); - DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); + +overwrite: + check_space_error = _gf_true; ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { @@ -685,7 +748,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fallocate (fstat) failed on fd=%p", fd); - goto out; + goto unlock; } if (xdata) { @@ -695,7 +758,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); ret = -EIO; - goto out; + goto unlock; } } @@ -706,7 +769,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, "fallocate failed on %s offset: %jd, " "len:%zu, flags: %d", uuid_utoa(fd->inode->gfid), offset, len, flags); - goto out; + goto unlock; } ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); @@ -714,16 +777,47 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fallocate (fstat) failed on fd=%p", fd); - goto out; + goto unlock; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); -out: +unlock: if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } + + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { +#ifdef FALLOC_FL_KEEP_SIZE + if (flags & FALLOC_FL_KEEP_SIZE) { + goto overwrite; + } +#endif + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +out: SET_TO_OLD_FS_ID(); if (ret == ENOSPC) ret = -ENOSPC; @@ -898,6 +992,7 @@ posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, } } + posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. * If it fails, fall back to _posix_do_zerofill() and an optional fsync. */ @@ -965,6 +1060,7 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt statpost = { 0, }; + dict_t *rsp_xdata = NULL; #ifdef FALLOC_FL_KEEP_SIZE if (keep_size) @@ -972,15 +1068,15 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, #endif /* FALLOC_FL_KEEP_SIZE */ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, - &statpost, xdata, NULL); + &statpost, xdata, &rsp_xdata); if (ret < 0) goto err; - STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; err: - STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata); return 0; } @@ -1031,25 +1127,57 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int op_ret = -1; int op_errno = EINVAL; dict_t *rsp_xdata = NULL; + gf_boolean_t check_space_error = _gf_false; + struct posix_fd *pfd = NULL; + struct stat statbuf = { + 0, + }; - VALIDATE_OR_GOTO(frame, out); - VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); priv = this->private; DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); +overwrite: + check_space_error = _gf_true; ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, xdata, &rsp_xdata); if (ret < 0) { op_ret = -1; op_errno = -ret; - goto out; + goto unwind; } STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; out: + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, rsp_xdata); return 0; @@ -1180,7 +1308,8 @@ posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, if (dir == NULL) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, - "opendir failed on %s", real_path); + "opendir failed on gfid-handle: %s (path: %s)", real_path, + loc->path); goto out; } @@ -1188,7 +1317,8 @@ posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, if (op_ret < 0) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, - "dirfd() failed on %s", real_path); + "dirfd() failed (path: %s, gfid-handle: %s", loc->path, + real_path); goto out; } @@ -1206,8 +1336,9 @@ posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, if (op_ret) gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd" - "context path=%s fd=%p", - real_path, fd); + "context path=%s " + "gfid-handle= %s,fd=%p", + loc->path, real_path, fd); posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL); @@ -1230,13 +1361,28 @@ out: return 0; } +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + int32_t posix_releasedir(xlator_t *this, fd_t *fd) { struct posix_fd *pfd = NULL; uint64_t tmp_pfd = 0; int ret = 0; - glusterfs_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); @@ -1253,22 +1399,8 @@ posix_releasedir(xlator_t *this, fd_t *fd) "pfd->dir is NULL for fd=%p", fd); goto out; } + posix_add_fd_to_cleanup(this, pfd); - ctx = THIS->ctx; - - pthread_mutex_lock(&ctx->janitor_lock); - { - INIT_LIST_HEAD(&pfd->list); - list_add_tail(&pfd->list, &ctx->janitor_fds); - pthread_cond_signal(&ctx->janitor_cond); - } - pthread_mutex_unlock(&ctx->janitor_lock); - - /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); - - sys_closedir(pfd->dir); - GF_FREE(pfd); - */ out: return 0; } @@ -1306,7 +1438,8 @@ posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, - "readlink on %s failed", real_path); + "readlink on gfid-handle: %s (path: %s) failed", real_path, + loc->path); goto out; } @@ -1350,8 +1483,9 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on %s failed", - real_path ? real_path : "<null>"); + "pre-operation lstat on (path: %s gfid-handle: %s) " + "failed", + loc->path, real_path ? real_path : "<null>"); goto out; } @@ -1366,11 +1500,13 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, } } + posix_update_iatt_buf(&prebuf, -1, real_path, xdata); op_ret = sys_truncate(real_path, offset); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, - "truncate on %s failed", real_path); + "truncate on gfid-handle: %s (path: %s) failed", real_path, + loc->path); goto out; } @@ -1379,7 +1515,8 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_path); + "lstat on gfid-handle %s (path: %s) failed", real_path, + loc->path); goto out; } @@ -1405,6 +1542,10 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, int32_t _fd = -1; struct posix_fd *pfd = NULL; struct posix_private *priv = NULL; + struct iatt preop = { + 0, + }; + dict_t *rsp_xdata = NULL; struct iatt stbuf = { 0, }; @@ -1456,7 +1597,8 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, - "open on %s, flags: %d", real_path, flags); + "open on gfid-handle %s (path: %s), flags: %d", real_path, + loc->path, flags); goto out; } @@ -1471,12 +1613,25 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, pfd->flags = flags; pfd->fd = _fd; + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + GF_FREE(pfd); + goto out; + } + + posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, + &rsp_xdata, _gf_true); + } + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); if (op_ret) gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, - "failed to set the fd context path=%s fd=%p", real_path, fd); + "failed to set the fd context gfid-handle=%s path=%s fd=%p", + real_path, loc->path, fd); - GF_ATOMIC_INC(priv->nr_files); op_ret = 0; out: @@ -1488,7 +1643,7 @@ out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL); + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); return 0; } @@ -1573,6 +1728,7 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } } + posix_update_iatt_buf(&preop, _fd, NULL, xdata); op_ret = sys_pread(_fd, iobuf->ptr, size, offset); if (op_ret == -1) { op_errno = errno; @@ -1787,19 +1943,28 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_boolean_t write_append = _gf_false; gf_boolean_t update_atomic = _gf_false; posix_inode_ctx_t *ctx = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; + int totlen = 0; + int idx = 0; - VALIDATE_OR_GOTO(frame, out); - VALIDATE_OR_GOTO(this, out); - VALIDATE_OR_GOTO(fd, out); - VALIDATE_OR_GOTO(fd->inode, out); - VALIDATE_OR_GOTO(vector, out); - VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); + VALIDATE_OR_GOTO(fd, unwind); + VALIDATE_OR_GOTO(fd->inode, unwind); + VALIDATE_OR_GOTO(vector, unwind); + VALIDATE_OR_GOTO(this->private, unwind); priv = this->private; - VALIDATE_OR_GOTO(priv, out); + VALIDATE_OR_GOTO(priv, unwind); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); +overwrite: + + check_space_error = _gf_true; if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, "writev received on a block/char file (%s)", @@ -1878,6 +2043,7 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, } } + posix_update_iatt_buf(&preop, _fd, NULL, xdata); if (locked && write_append) { if (preop.ia_size == offset || (fd->flags & O_APPEND)) is_append = 1; @@ -1940,6 +2106,36 @@ out: locked = _gf_false; } + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto unwind; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto unwind; + } + + for (idx = 0; idx < count; idx++) { + totlen = vector[idx].iov_len; + } + + if ((offset + totlen <= statbuf.st_size) && + !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, rsp_xdata); @@ -1975,6 +2171,7 @@ posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, gf_boolean_t locked = _gf_false; gf_boolean_t update_atomic = _gf_false; posix_inode_ctx_t *ctx = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); @@ -2110,13 +2307,12 @@ posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, flags); if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; + op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, "copy_file_range failed: fd_in: %p (gfid: %s) ," " fd_out %p (gfid:%s)", - fd_in, uuid_utoa(fd_in->inode->gfid), fd_out, - uuid_utoa(fd_out->inode->gfid)); + fd_in, uuid_utoa_r(fd_in->inode->gfid, in_uuid_str), fd_out, + uuid_utoa_r(fd_out->inode->gfid, out_uuid_str)); goto out; } @@ -2223,7 +2419,7 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) }; struct posix_private *priv = NULL; int shared_by = 1; - int percent = 0; + double percent = 0; uint64_t reserved_blocks = 0; VALIDATE_OR_GOTO(frame, out); @@ -2245,12 +2441,20 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, - "statvfs failed on %s", real_path); + "statvfs failed on gfid-handle %s (path: %s)", real_path, + loc->path); goto out; } - percent = priv->disk_reserve; - reserved_blocks = (buf.f_blocks * percent) / 100; + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5); + } else { + if (buf.f_bsize) { + reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) / + buf.f_bsize; + } + } if (buf.f_bfree > reserved_blocks) { buf.f_bfree = (buf.f_bfree - reserved_blocks); @@ -2318,18 +2522,13 @@ out: int32_t posix_release(xlator_t *this, fd_t *fd) { - struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; int ret = -1; uint64_t tmp_pfd = 0; - glusterfs_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); - priv = this->private; - ctx = THIS->ctx; - ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, @@ -2343,18 +2542,8 @@ posix_release(xlator_t *this, fd_t *fd) "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } - pthread_mutex_lock(&ctx->janitor_lock); - { - INIT_LIST_HEAD(&pfd->list); - list_add_tail(&pfd->list, &ctx->janitor_fds); - pthread_cond_signal(&ctx->janitor_cond); - } - pthread_mutex_unlock(&ctx->janitor_lock); - - if (!priv) - goto out; + posix_add_fd_to_cleanup(this, pfd); - GF_ATOMIC_DEC(priv->nr_files); out: return 0; } @@ -2488,7 +2677,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) filler = tmp; - return posix_handle_pair(filler->this, filler->real_path, k, v, + return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, filler->flags, filler->stbuf); } @@ -2523,6 +2712,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t ret = 0; ssize_t acl_size = 0; dict_t *xattr = NULL; + dict_t *subvol_xattrs = NULL; posix_xattr_filler_t filler = { 0, }; @@ -2531,12 +2721,17 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, 0, }; data_t *tdata = NULL; - char stime[4096]; - char sxattr[4096]; + char *cs_var = NULL; gf_cs_obj_state state = -1; - char remotepath[4096] = {0}; int i = 0; int len; + struct mdata_iatt mdata_iatt = { + 0, + }; + int8_t sync_backend_xattrs = _gf_false; + data_pair_t *custom_xattrs; + data_t *keyval = NULL; + char **xattrs_to_heal = get_xattrs_to_heal(); DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); @@ -2557,6 +2752,20 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto out; } + ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); + if (ret == 0) { + /* This is initiated by lookup when ctime feature is enabled to create + * "trusted.glusterfs.mdata" xattr if not present. These are the files + * which were created when ctime feature is disabled. + */ + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + op_ret = -1; + } + goto out; + } + posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); op_ret = -1; @@ -2588,10 +2797,11 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto unlock; } - sprintf(stime, "%" PRId64, tmp_stbuf.ia_mtime); + cs_var = alloca(4096); + sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); /*TODO: may be should consider nano-second also */ - if (strncmp(stime, tdata->data, tdata->len) != 0) { + if (strncmp(cs_var, tdata->data, tdata->len) > 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "mtime " "passed is different from seen by file now." @@ -2601,31 +2811,54 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto unlock; } - len = sprintf(sxattr, "%" PRIu64, tmp_stbuf.ia_size); + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); - ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, sxattr, len, + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, flags); if (ret) { + op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, 0, "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); + + ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); + + ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, + flags); + if (ret) { op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); goto unlock; } + memset(cs_var, 0, 4096); if (loc->path[0] == '/') { for (i = 1; i < strlen(loc->path); i++) { - remotepath[i - 1] = loc->path[i]; + cs_var[i - 1] = loc->path[i]; } - remotepath[i] = '\0'; - gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", - remotepath); + cs_var[i] = '\0'; + gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); } - ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, remotepath, - strlen(loc->path), flags); + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, + strlen(cs_var), flags); if (ret) { + op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "setxattr failed - %s" " %d", @@ -2635,13 +2868,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, ret = sys_truncate(real_path, 0); if (ret) { + op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "truncate failed - %s" " %d", GF_CS_OBJECT_SIZE, ret); - op_errno = errno; ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); if (ret) { + op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "removexattr " "failed post processing- %s" @@ -2659,12 +2893,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, } unlock: UNLOCK(&loc->inode->lock); + op_ret = ret; goto out; } filler.real_path = real_path; filler.this = this; filler.stbuf = &preop; + filler.loc = loc; #ifdef GF_DARWIN_HOST_OS filler.flags = map_xattr_flags(flags); @@ -2678,6 +2914,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto out; } + ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs"); + } + + if (sync_backend_xattrs) { + /* List all custom xattrs */ + subvol_xattrs = dict_new(); + if (!subvol_xattrs) + goto out; + + ret = dict_set_int32_sizen(xdata, "list-xattr", 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "Unable to set list-xattr in dict "); + goto out; + } + + subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + NULL); + + /* Remove all user xattrs from the file */ + dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr, + real_path); + + /* Remove all custom xattrs from the file */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]); + if (keyval) { + ret = sys_lremovexattr(real_path, xattrs_to_heal[i]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, + errno, "removexattr failed. key %s path %s", + xattrs_to_heal[i], loc->path); + goto out; + } + + dict_del(subvol_xattrs, xattrs_to_heal[i]); + keyval = NULL; + } + } + + /* Set custom xattrs based on info provided by DHT */ + custom_xattrs = dict->members_list; + + while (custom_xattrs != NULL) { + ret = sys_lsetxattr(real_path, custom_xattrs->key, + custom_xattrs->value->data, + custom_xattrs->value->len, flags); + if (ret) { + op_errno = errno; + gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d", + custom_xattrs->key, ret); + goto out; + } + + custom_xattrs = custom_xattrs->next; + } + } + xattr = dict_new(); if (!xattr) goto out; @@ -2785,6 +3081,9 @@ out: if (xattr) dict_unref(xattr); + if (subvol_xattrs) + dict_unref(subvol_xattrs); + return 0; } @@ -2811,7 +3110,9 @@ posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, } if (op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "posix_xattr_get_real_filename (lstat) on %s failed", real_path); + "posix_xattr_get_real_filename (lstat) on " + "gfid-handle %s (path: %s) failed", + real_path, loc->path); return -errno; } @@ -2840,7 +3141,7 @@ posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, (void)sys_closedir(fd); if (!found) - return -ENOENT; + return -ENOATTR; ret = dict_set_dynstr(dict, (char *)key, found); if (ret) { @@ -3114,7 +3415,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, } while (remaining_size > 0) { - snprintf(key, sizeof(key), "%s", list + list_offset); + len = snprintf(key, sizeof(key), "%s", list + list_offset); if (strncmp(key, PGFID_XATTR_KEY_PREFIX, SLEN(PGFID_XATTR_KEY_PREFIX)) != 0) goto next; @@ -3162,7 +3463,6 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, } next: - len = strlen(key); remaining_size -= (len + 1); list_offset += (len + 1); } /* while (remaining_size > 0) */ @@ -3278,8 +3578,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, "could not get acl (%s) for" - "%s", - name, real_path); + "gfid-handle %s (path: %s)", + name, real_path, loc->path); op_ret = -1; goto out; } @@ -3288,9 +3588,9 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { GF_FREE(value); gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, - "could not set acl (%s) for" - "%s in dictionary", - name, real_path); + "could not set acl (%s) for %s " + "(gfid-handle: %s) in dictionary", + name, loc->path, real_path); op_ret = -1; op_errno = ENOMEM; goto out; @@ -3308,7 +3608,7 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { op_ret = -1; op_errno = -ret; - if (op_errno == ENOENT) { + if (op_errno == ENOATTR) { gf_msg_debug(this->name, 0, "Failed to get " "real filename (%s, %s)", @@ -3491,9 +3791,9 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (strncmp(key, "user.", 5) == 0) { key += 5; gf_msg_debug(this->name, 0, - "getxattr for file %s" + "getxattr for file %s (gfid-handle: %s)" " stripping user key: %s -> %s", - real_path, keybuffer, key); + loc->path, real_path, keybuffer, key); } } #endif @@ -3504,8 +3804,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "getxattr failed due to overflow of buffer" - " on %s: %s ", - real_path, key); + " on gfid-handle %s (path: %s) : %s ", + real_path, loc->path, key); size = sys_lgetxattr(real_path, key, NULL, 0); } if (size == -1) { @@ -3520,12 +3820,14 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, } if ((op_errno == ENOATTR) || (op_errno == ENODATA)) { gf_msg_debug(this->name, 0, - "No such attribute:%s for file %s", key, - real_path); + "No such attribute:%s for file %s (path: %s)", + key, real_path, loc->path); } else { gf_msg(this->name, GF_LOG_ERROR, op_errno, - P_MSG_XATTR_FAILED, "getxattr failed on %s: %s ", - real_path, key); + P_MSG_XATTR_FAILED, + "getxattr failed on " + "%s (path: %s): %s ", + real_path, loc->path, key); } goto out; } @@ -3545,7 +3847,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "getxattr failed on %s: key = %s", real_path, key); + "getxattr failed on %s (path: %s): key = %s", real_path, + loc->path, key); GF_FREE(value); goto out; } @@ -3556,8 +3859,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = -op_ret; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, "dict set operation " - "on %s for the key %s failed.", - real_path, key); + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, key); GF_FREE(value); goto out; } @@ -3573,8 +3876,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "listxattr failed due to overflow of buffer" - " on %s ", - real_path); + " on %s (path: %s) ", + real_path, loc->path); size = sys_llistxattr(real_path, NULL, 0); } if (size == -1) { @@ -3588,7 +3891,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, "flag)"); } else { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "listxattr failed on %s", real_path); + "listxattr failed on %s (path: %s)", real_path, + loc->path); } goto out; } @@ -3639,16 +3943,16 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED, "getxattr failed due to overflow of" - " buffer on %s: %s ", - real_path, keybuffer); + " buffer on %s (path: %s): %s ", + real_path, loc->path, keybuffer); size = sys_lgetxattr(real_path, keybuffer, NULL, 0); } if (size == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on" - " %s: key = %s ", - real_path, keybuffer); + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); goto out; } } @@ -3666,8 +3970,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on" - " %s: key = %s ", - real_path, keybuffer); + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); GF_FREE(value); goto out; } @@ -3685,8 +3989,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = -op_ret; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, "dict set operation " - "on %s for the key %s failed.", - real_path, keybuffer); + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, keybuffer); GF_FREE(value); goto out; } @@ -3952,7 +4256,6 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, size = sys_fgetxattr(_fd, key, NULL, 0); } if (size == -1) { - op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr failed " @@ -3973,7 +4276,6 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, bzero(value, size + 1); size = sys_fgetxattr(_fd, key, value, size); if (size == -1) { - op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr failed o" @@ -4251,10 +4553,19 @@ posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, goto out; } - if (loc) + if (loc) { ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false); - else + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } + } else { ret = posix_fdstat(this, inode, _fd, &preop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path ? real_path : ""); + } + } if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, @@ -4310,9 +4621,17 @@ posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, posix_set_ctime(frame, this, real_path, -1, inode, NULL); ret = posix_pstat(this, inode, loc->gfid, real_path, &postop, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } } else { posix_set_ctime(frame, this, NULL, _fd, inode, NULL); ret = posix_fdstat(this, inode, _fd, &postop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path); + } } if (ret) goto out; @@ -4695,6 +5014,7 @@ unlock: op_ret = -1; op_errno = EINVAL; GF_FREE(array); + array = NULL; goto out; } array = NULL; @@ -4927,6 +5247,7 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, } } + posix_update_iatt_buf(&preop, _fd, NULL, xdata); op_ret = sys_ftruncate(_fd, offset); if (op_ret == -1) { @@ -4944,6 +5265,8 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, goto out; } + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); + op_ret = 0; out: @@ -5008,8 +5331,10 @@ posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); } + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); } + posix_update_iatt_buf(&buf, _fd, NULL, xdata); op_ret = 0; out: @@ -5145,20 +5470,13 @@ posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size, } if (skip_dirs) { - len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); if (len <= 0) { errno = ESTALE; count = -1; goto out; } - hpath = alloca(len + 256); /* NAME_MAX */ - - if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) { - errno = ESTALE; - count = -1; - goto out; - } - len = strlen(hpath); hpath[len] = '/'; } @@ -5326,22 +5644,14 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, itable = fd->inode->table; - len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); if (len <= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, "Failed to create handle path, fd=%p, gfid=%s", fd, uuid_utoa(fd->inode->gfid)); return -1; } - - hpath = alloca(len + 256); /* NAME_MAX */ - if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, - "Failed to create handle path, fd=%p, gfid=%s", fd, - uuid_utoa(fd->inode->gfid)); - return -1; - } - len = strlen(hpath); hpath[len] = '/'; @@ -5363,6 +5673,8 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, continue; } + posix_update_iatt_buf(&stbuf, -1, hpath, dict); + if (!inode) inode = inode_find(itable, stbuf.ia_gfid); diff --git a/xlators/storage/posix/src/posix-inode-handle.h b/xlators/storage/posix/src/posix-inode-handle.h index 2009421cdba..36c47f2bebc 100644 --- a/xlators/storage/posix/src/posix-inode-handle.h +++ b/xlators/storage/posix/src/posix-inode-handle.h @@ -12,7 +12,6 @@ #include <limits.h> #include <sys/types.h> -#include <glusterfs/xlator.h> #include <glusterfs/gf-dirent.h> #include "posix.h" @@ -45,14 +44,13 @@ #define MAKE_HANDLE_PATH(var, this, gfid, base) \ do { \ - int __len; \ - __len = posix_handle_path(this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ - var = alloca(__len); \ - __len = posix_handle_path(this, gfid, base, var, __len); \ - if (__len <= 0) \ + int __len = 0; \ + int tot = PATH_MAX; \ + var = alloca(tot); \ + __len = posix_handle_path(this, gfid, base, var, tot); \ + if (__len <= 0) { \ var = NULL; \ + } \ } while (0) /* TODO: it is not a good idea to change a variable which diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index 32292756bc4..f5bede266da 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -68,6 +68,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED, P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED, P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, - P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED); + P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED, P_MSG_NOMEM, + P_MSG_PSTAT_FAILED, P_MSG_FDSTAT_FAILED); #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index 0ea90992714..b1889052f11 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -56,22 +56,32 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) out->atime.tv_nsec = be64toh(in->atime.tv_nsec); } +void +posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in) +{ + out->ia_ctime = be64toh(in->ctime.tv_sec); + out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec); + + out->ia_mtime = be64toh(in->mtime.tv_sec); + out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec); + + out->ia_atime = be64toh(in->atime.tv_sec); + out->ia_atime_nsec = be64toh(in->atime.tv_nsec); +} + /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ static int posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, inode_t *inode, posix_mdata_t *metadata, int *op_errno) { - size_t size = -1; + size_t size = 256; int op_ret = -1; char *value = NULL; gf_boolean_t fd_based_fop = _gf_false; char gfid_str[64] = {0}; char *real_path = NULL; - char *key = GF_XATTR_MDATA_KEY; - if (!metadata) { - op_ret = -1; goto out; } @@ -79,84 +89,101 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, fd_based_fop = _gf_true; } if (!(fd_based_fop || real_path_arg)) { + GF_VALIDATE_OR_GOTO(this->name, inode, out); MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); if (!real_path) { + *op_errno = errno; uuid_utoa_r(inode->gfid, gfid_str); - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_LSTAT_FAILED, "lstat on gfid %s failed", gfid_str); - op_ret = -1; - *op_errno = errno; goto out; } } + value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char); + if (!value) { + *op_errno = ENOMEM; + goto out; + } + if (fd_based_fop) { - size = sys_fgetxattr(_fd, key, NULL, 0); + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size); } else if (real_path_arg) { - size = sys_lgetxattr(real_path_arg, key, NULL, 0); + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, size); } else if (real_path) { - size = sys_lgetxattr(real_path, key, NULL, 0); + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size); } if (size == -1) { *op_errno = errno; + if (value) { + GF_FREE(value); + value = NULL; + } if ((*op_errno == ENOTSUP) || (*op_errno == ENOSYS)) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " + "Extended attributes not supported" + " (try remounting brick with 'user xattr' " "flag)"); } else if (*op_errno == ENOATTR || *op_errno == ENODATA) { gf_msg_debug(this->name, 0, - "No such attribute:%s for file %s " - "gfid: %s", - key, + "No such attribute:%s for file %s gfid: %s", + GF_XATTR_MDATA_KEY, real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid)); - } else { - gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED, - "getxattr failed" - " on %s gfid: %s key: %s ", + inode ? uuid_utoa(inode->gfid) : "null"); + goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, NULL, 0); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, NULL, 0); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, NULL, 0); + } + + if (size == -1) { /* give up now and exist with an error */ + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s gfid: %s key: %s ", real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); + goto out; } - op_ret = -1; - goto out; - } - value = GF_CALLOC(size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } + value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char); + if (!value) { + *op_errno = ENOMEM; + goto out; + } - if (fd_based_fop) { - size = sys_fgetxattr(_fd, key, value, size); - } else if (real_path_arg) { - size = sys_lgetxattr(real_path_arg, key, value, size); - } else if (real_path) { - size = sys_lgetxattr(real_path, key, value, size); - } - if (size == -1) { - op_ret = -1; - *op_errno = errno; - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "getxattr failed on " - " on %s gfid: %s key: %s ", - real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); - goto out; + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, + size); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size); + } + if (size == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); + goto out; + } } - posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); op_ret = 0; out: - GF_FREE(value); + if (value) + GF_FREE(value); return op_ret; } @@ -229,16 +256,29 @@ int __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, inode_t *inode, struct iatt *stbuf) { + uint64_t ctx; posix_mdata_t *mdata = NULL; int ret = -1; int op_errno = 0; - GF_VALIDATE_OR_GOTO(this->name, inode, out); + /* Handle readdirp: inode might be null, time attributes should be served + * from xattr not from backend's file attributes */ + if (inode) { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + } + } else { + ret = -1; + } - ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); if (ret == -1 || !mdata) { mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. file: %s: gfid: %s", + real_path ? real_path : "null", + inode ? uuid_utoa(inode->gfid) : "null"); ret = -1; goto out; } @@ -251,21 +291,14 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, * is hit when in-memory status is lost due to brick * down scenario */ - __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + if (inode) { + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } } else { /* Failed to get mdata from disk, xattr missing. - * This happens on two cases. - * 1. File is created before ctime is enabled. - * 2. On new file creation. - * - * Do nothing, just return success. It is as - * good as ctime feature is not enabled for this - * file. For files created before ctime is enabled, - * time attributes gets updated into ctime structure - * once the metadata modification fop happens and - * time attributes become consistent eventually. - * For new files, it would obviously get updated - * before the fop completion. + * This happens when the file is created before + * ctime is enabled. */ if (stbuf && op_errno != ENOENT) { ret = 0; @@ -278,7 +311,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, */ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ", - real_path ? real_path : "null", uuid_utoa(inode->gfid), + real_path ? real_path : "null", + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); GF_FREE(mdata); ret = 0; @@ -297,6 +331,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, stbuf->ia_atime = mdata->atime.tv_sec; stbuf->ia_atime_nsec = mdata->atime.tv_nsec; } + /* Not set in inode context, hence free mdata */ + if (!inode) { + GF_FREE(mdata); + } out: return ret; @@ -332,14 +370,110 @@ posix_compare_timespec(struct timespec *first, struct timespec *second) return first->tv_sec - second->tv_sec; } +int +posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + const char *realpath, + struct mdata_iatt *mdata_iatt, int *op_errno) +{ + uint64_t ctx; + posix_mdata_t *mdata = NULL; + posix_mdata_t imdata = { + 0, + }; + int ret = 0; + gf_boolean_t mdata_already_set = _gf_false; + + GF_VALIDATE_OR_GOTO("posix", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0 && ctx) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + mdata_already_set = _gf_true; + } else { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. gfid: %s", + uuid_utoa(inode->gfid)); + ret = -1; + *op_errno = ENOMEM; + goto unlock; + } + + ret = posix_fetch_mdata_xattr(this, realpath, -1, inode, + (void *)mdata, op_errno); + if (ret == 0) { + /* Got mdata from disk. This is a race, another client + * has healed the xattr during lookup. So set it in inode + * ctx */ + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + mdata_already_set = _gf_true; + } else { + *op_errno = 0; + mdata->version = 1; + mdata->flags = 0; + mdata->ctime.tv_sec = mdata_iatt->ia_ctime; + mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; + mdata->atime.tv_sec = mdata_iatt->ia_atime; + mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec; + mdata->mtime.tv_sec = mdata_iatt->ia_mtime; + mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } + } + + if (mdata_already_set) { + /* Compare and update the larger time */ + imdata.ctime.tv_sec = mdata_iatt->ia_ctime; + imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; + imdata.atime.tv_sec = mdata_iatt->ia_atime; + imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec; + imdata.mtime.tv_sec = mdata_iatt->ia_mtime; + imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + + if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) { + mdata->ctime = imdata.ctime; + } + if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) { + mdata->mtime = imdata.mtime; + } + if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) { + mdata->atime = imdata.atime; + } + } + + ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "gfid: %s key:%s ", uuid_utoa(inode->gfid), + GF_XATTR_MDATA_KEY); + *op_errno = errno; + goto unlock; + } + } +unlock: + UNLOCK(&inode->lock); +out: + return ret; +} + /* posix_set_mdata_xattr updates the posix_mdata_t based on the flag * in inode context and stores it on disk */ static int posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct timespec *time, struct iatt *stbuf, - posix_mdata_flag_t *flag, gf_boolean_t update_utime) + inode_t *inode, struct timespec *time, + struct timespec *u_atime, struct timespec *u_mtime, + struct iatt *stbuf, posix_mdata_flag_t *flag, + gf_boolean_t update_utime) { + uint64_t ctx; posix_mdata_t *mdata = NULL; int ret = -1; int op_errno = 0; @@ -348,9 +482,17 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, GF_VALIDATE_OR_GOTO(this->name, inode, out); GF_VALIDATE_OR_GOTO(this->name, time, out); + if (update_utime && (flag->atime && !u_atime) && + (flag->mtime && !u_mtime)) { + goto out; + } + LOCK(&inode->lock); { - ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + } if (ret == -1 || !mdata) { /* * Do we need to fetch the data from xattr @@ -359,6 +501,9 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, */ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. file: %s: gfid: %s", + real_path ? real_path : "null", uuid_utoa(inode->gfid)); ret = -1; goto unlock; } @@ -370,39 +515,32 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, * is hit when in-memory status is lost due to brick * down scenario */ - __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); } else { /* - * This is the first time creating the time - * attr. This happens when you activate this - * feature, and the legacy file will not have - * any xattr set. - * - * New files will create extended attributes. + * This is the first time creating the time attr. This happens + * when you activate this feature. On this code path, only new + * files will create mdata xattr. The legacy files (files + * created before ctime enabled) will not have any xattr set. + * The xattr on legacy file will be set via lookup. */ - /* - * TODO: This is wrong approach, because before - * creating fresh xattr, we should consult - * to all replica and/or distribution set. - * - * We should contact the time management - * xlators, and ask them to create an xattr. - */ - /* We should not be relying on backend file's - * time attributes to load the initial ctime - * time attribute structure. This is incorrect - * as each replica set would have witnessed the - * file creation at different times. - * - * For new file creation, ctime, atime and mtime - * should be same, hence initiate the ctime - * structure with the time from the frame. But - * for the files which were created before ctime - * feature is enabled, this is not accurate but - * still fine as the times would get eventually - * accurate. + /* Don't create xattr with utimes/utimensat, only update if + * present. This otherwise causes issues during inservice + * upgrade. It causes inconsistent xattr values with in replica + * set. The scenario happens during upgrade where clients are + * older versions (without the ctime feature) and the server is + * upgraded to the new version (with the ctime feature which + * is enabled by default). */ + + if (update_utime) { + UNLOCK(&inode->lock); + GF_FREE(mdata); + return 0; + } + mdata->version = 1; mdata->flags = 0; mdata->ctime.tv_sec = time->tv_sec; @@ -412,36 +550,35 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, mdata->mtime.tv_sec = time->tv_sec; mdata->mtime.tv_nsec = time->tv_nsec; - __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); } } - /* Earlier, mdata was updated only if the existing time is less - * than the time to be updated. This would fail the scenarios - * where mtime can be set to any time using the syscall. Hence - * just updating without comparison. But the ctime is not - * allowed to changed to older date. - */ - - if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) { - mdata->ctime = *time; - } - /* In distributed systems, there could be races with fops * updating mtime/atime which could result in different * mtime/atime for same file. So this makes sure, only the * highest time is retained. If the mtime/atime update comes * from the explicit utime syscall, it is allowed to set to - * previous time + * previous or future time but the ctime is always set to + * current time. */ if (update_utime) { + if (flag->ctime && + posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } if (flag->mtime) { - mdata->mtime = *time; + mdata->mtime = *u_mtime; } if (flag->atime) { - mdata->atime = *time; + mdata->atime = *u_atime; } } else { + if (flag->ctime && + posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } if (flag->mtime && posix_compare_timespec(time, &mdata->mtime) > 0) { mdata->mtime = *time; @@ -486,7 +623,6 @@ out: stbuf->ia_atime_nsec = mdata->atime.tv_nsec; } - return ret; } @@ -495,15 +631,22 @@ out: */ void posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct iatt *stbuf, int valid) + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid) { int32_t ret = 0; #if defined(HAVE_UTIMENSAT) - struct timespec tv = { + struct timespec tv_atime = { + 0, + }; + struct timespec tv_mtime = { 0, }; #else - struct timeval tv = { + struct timeval tv_atime = { + 0, + }; + struct timeval tv_mtime = { 0, }; #endif @@ -515,37 +658,35 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, priv = this->private; + /* NOTE: + * This routine (utimes) is intentionally allowed for all internal and + * external clients even if ctime is not set. This is because AFR and + * WORM uses time attributes for it's internal operations + */ if (inode && priv->ctime) { if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { - tv.tv_sec = stbuf->ia_atime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec); + tv_atime.tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec); - flag.ctime = 0; - flag.mtime = 0; + flag.ctime = 1; flag.atime = 1; - ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, - &flag, _gf_true); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, - "posix set mdata atime failed on file:" - " %s gfid:%s", - real_path, uuid_utoa(inode->gfid)); - } } if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { - tv.tv_sec = stbuf->ia_mtime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); + tv_mtime.tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec); flag.ctime = 1; flag.mtime = 1; - flag.atime = 0; + } - ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, - &flag, _gf_true); + if (flag.mtime || flag.atime) { + ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime, + &tv_atime, &tv_mtime, NULL, &flag, + _gf_true); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, - "posix set mdata mtime failed on file:" + "posix set mdata atime failed on file:" " %s gfid:%s", real_path, uuid_utoa(inode->gfid)); } @@ -554,6 +695,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, return; } +/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs + * to be modified + */ +void +posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid) +{ + int32_t ret = 0; +#if defined(HAVE_UTIMENSAT) + struct timespec tv_ctime = { + 0, + }; +#else + struct timeval tv_ctime = { + 0, + }; +#endif + posix_mdata_flag_t flag = { + 0, + }; + + struct posix_private *priv = NULL; + priv = this->private; + + if (inode && priv->ctime) { + tv_ctime.tv_sec = stbuf->ia_ctime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec); + flag.ctime = 1; + + ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL, + NULL, NULL, &flag, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } + } + return; +} + static void posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) { @@ -607,17 +790,9 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { goto out; } - - if (frame->root->ctime.tv_sec == 0) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, - "posix set mdata failed, No ctime : %s gfid:%s", real_path, - inode ? uuid_utoa(inode->gfid) : "No inode"); - goto out; - } - ret = posix_set_mdata_xattr(this, real_path, fd, inode, - &frame->root->ctime, stbuf, &flag, - _gf_false); + &frame->root->ctime, NULL, NULL, stbuf, + &flag, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, "posix set mdata failed on file: %s gfid:%s", real_path, @@ -647,8 +822,8 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, goto out; } ret = posix_set_mdata_xattr(this, real_path, fd, inode, - &frame->root->ctime, stbuf, &flag, - _gf_false); + &frame->root->ctime, NULL, NULL, stbuf, + &flag, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, "posix set mdata failed on file: %s gfid:%s", real_path, @@ -673,6 +848,7 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, }; int ret = 0; struct posix_private *priv = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; priv = this->private; @@ -687,9 +863,11 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, "posix set mdata failed, No ctime : in: %s gfid_in:%s " "out: %s gfid_out:%s", real_path_in, - inode_in ? uuid_utoa(inode_in->gfid) : "No inode", + (inode_in ? uuid_utoa_r(inode_in->gfid, in_uuid_str) + : "No inode"), real_path_out, - inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + (inode_out ? uuid_utoa_r(inode_out->gfid, out_uuid_str) + : "No inode")); goto out; } @@ -706,8 +884,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, flag_dup.atime = 0; ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, - &frame->root->ctime, stbuf_out, &flag_dup, - _gf_false); + &frame->root->ctime, NULL, NULL, stbuf_out, + &flag_dup, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, "posix set mdata failed on file: %s gfid:%s", real_path_out, @@ -725,8 +903,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, flag_dup.ctime = 0; ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, - &frame->root->ctime, stbuf_out, &flag_dup, - _gf_false); + &frame->root->ctime, NULL, NULL, stbuf_out, + &flag_dup, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, "posix set mdata failed on file: %s gfid:%s", real_path_in, diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h index 3416148ea97..d37014af93e 100644 --- a/xlators/storage/posix/src/posix-metadata.h +++ b/xlators/storage/posix/src/posix-metadata.h @@ -15,13 +15,15 @@ /* In memory representation posix metadata xattr */ typedef struct { - /* version of structure, bumped up if any new member is added */ - uint8_t version; /* flags indicates valid fields in the structure */ uint64_t flags; struct timespec ctime; struct timespec mtime; struct timespec atime; + /* version of structure, bumped up if any new member is added */ + uint8_t version; + + char _pad[7]; /* manual padding */ } posix_mdata_t; typedef struct { @@ -40,7 +42,12 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, inode_t *inode, struct iatt *stbuf); void posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct iatt *stbuf, int valid); + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); +void +posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); void posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, int fd, inode_t *inode, struct iatt *stbuf); @@ -53,5 +60,12 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, const char *real_path_in, int fd_in, inode_t *inode_in, struct iatt *stbuf_in, const char *read_path_put, int fd_out, inode_t *inode_out, struct iatt *stbuf_out); +int +posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + const char *realpath, + struct mdata_iatt *mdata_iatt, + int *op_errno); +void +posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in); #endif /* _POSIX_METADATA_H */ diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index d5ba08ca593..b8db146eef2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -16,12 +16,8 @@ #include <dirent.h> #include <time.h> -#ifdef linux -#ifdef __GLIBC__ +#ifdef HAVE_SET_FSID #include <sys/fsuid.h> -#else -#include <unistd.h> -#endif #endif #ifdef HAVE_SYS_XATTR_H @@ -32,11 +28,9 @@ #include <sys/extattr.h> #endif -#include <glusterfs/xlator.h> #include <glusterfs/compat.h> #include <glusterfs/timer.h> #include "posix-mem-types.h" -#include "posix-handle.h" #include <glusterfs/call-stub.h> #ifdef HAVE_LIBAIO @@ -53,21 +47,21 @@ #define ACL_BUFFER_MAX 4096 /* size of character buffer */ #define DHT_LINKTO "trusted.glusterfs.dht.linkto" -/* - * TIER_MODE need to be changed when we stack tiers - */ -#define TIER_LINKTO "trusted.tier.tier-dht.linkto" #define POSIX_GFID_HANDLE_SIZE(base_path_len) \ (base_path_len + SLEN("/") + SLEN(GF_HIDDEN_PATH) + SLEN("/") + \ SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */; + +#define POSIX_GFID_HANDLE_RELSIZE \ + SLEN("../") + SLEN("../") + SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1; + #define GF_UNLINK_TRUE 0x0000000000000001 #define GF_UNLINK_FALSE 0x0000000000000000 #define DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out) \ do { \ if (frame->root->pid >= 0 && priv->disk_space_full && \ - !dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \ + !dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \ op_ret = -1; \ op_errno = ENOSPC; \ gf_msg_debug("posix", ENOSPC, \ @@ -95,9 +89,8 @@ #endif #define GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xattr_req, op_ret, \ - op_errno, out) \ + op_errno, _uuid_req, out) \ do { \ - uuid_t _uuid_req; \ int _ret = 0; \ /* TODO: Remove pid check once trash implements client side \ * logic to assign gfid for entry creations inside .trashcan \ @@ -107,9 +100,7 @@ _ret = dict_get_gfuuid(xattr_req, "gfid-req", &_uuid_req); \ if (_ret) { \ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, \ - "failed to get the gfid from" \ - " dict for %s", \ - loc->path); \ + "failed to get the gfid from dict for %s", loc->path); \ op_ret = -1; \ op_errno = EINVAL; \ goto out; \ @@ -128,12 +119,14 @@ */ struct posix_fd { - int fd; /* fd returned by the kernel */ - int32_t flags; /* flags for open/creat */ - DIR *dir; /* handle returned by the kernel */ - off_t dir_eof; /* offset at dir EOF */ - int odirect; + int fd; /* fd returned by the kernel */ + int32_t flags; /* flags for open/creat */ + DIR *dir; /* handle returned by the kernel */ + off_t dir_eof; /* offset at dir EOF */ struct list_head list; /* to add to the janitor list */ + int odirect; + xlator_t *xl; + char _pad[4]; /* manual padding */ }; struct posix_private { @@ -144,66 +137,38 @@ struct posix_private { gf_lock_t lock; char *hostname; - /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; time_t last_landfill_check; - int32_t janitor_sleep_duration; gf_atomic_t read_value; /* Total read, from init */ gf_atomic_t write_value; /* Total write, from init */ - gf_atomic_t nr_files; - /* - In some cases, two exported volumes may reside on the same - partition on the server. Sending statvfs info for both - the volumes will lead to erroneous df output at the client, - since free space on the partition will be counted twice. - - In such cases, user can disable exporting statvfs info - on one of the volumes by setting this option. - */ - gf_boolean_t export_statfs; - - gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ - - /* - decide whether posix_unlink does open (file), unlink (file), close (fd) - instead of just unlink (file). with the former approach there is no - lockout of access to parent directory during removal of very large files - for the entire duration of freeing of data blocks. - */ - gf_boolean_t background_unlink; /* janitor task which cleans up /.trash (created by replicate) */ struct gf_tw_timer_list *janitor; char *trash_path; /* lock for brick dir */ - DIR *mount_lock; + int mount_lock; struct stat handledir; /* uuid of glusterd that swapned the brick process */ uuid_t glusterd_uuid; - gf_boolean_t aio_configured; - gf_boolean_t aio_init_done; - gf_boolean_t aio_capable; #ifdef HAVE_LIBAIO io_context_t ctxp; pthread_t aiothread; #endif - /* node-uuid in pathinfo xattr */ - gf_boolean_t node_uuid_pathinfo; - pthread_t fsyncer; struct list_head fsyncs; pthread_mutex_t fsync_mutex; pthread_cond_t fsync_cond; + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; int fsync_queue_count; + int32_t janitor_sleep_duration; enum { BATCH_NONE = 0, @@ -214,8 +179,6 @@ struct posix_private { } batch_fsync_mode; uint32_t batch_fsync_delay_usec; - gf_boolean_t update_pgfid_nlinks; - gf_boolean_t gfid2path; char gfid2path_sep[8]; /* seconds to sleep between health checks */ @@ -223,12 +186,10 @@ struct posix_private { /* seconds to sleep to wait for aio write finish for health checks */ uint32_t health_check_timeout; pthread_t health_check; - gf_boolean_t health_check_active; - uint32_t disk_reserve; - uint32_t disk_space_full; + double disk_reserve; pthread_t disk_space_check; - gf_boolean_t disk_space_check_active; + uint32_t disk_space_full; #ifdef GF_DARWIN_HOST_OS enum { @@ -243,9 +204,6 @@ struct posix_private { same backend. Very much usable in brick-splitting feature. */ int32_t shared_brick_count; - /* This option is used for either to call a landfill_purge or not */ - gf_boolean_t disable_landfill_purge; - /*Option to set mode bit permission that will always be set on file/directory. */ mode_t force_create_mode; @@ -253,9 +211,47 @@ struct posix_private { mode_t create_mask; mode_t create_directory_mask; uint32_t max_hardlinks; + int32_t arrdfd[256]; + int dirfd; + + /* This option is used for either to call a landfill_purge or not */ + gf_boolean_t disable_landfill_purge; gf_boolean_t fips_mode_rchecksum; gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; + + gf_boolean_t disk_space_check_active; + char disk_unit; + gf_boolean_t health_check_active; + gf_boolean_t update_pgfid_nlinks; + gf_boolean_t gfid2path; + /* node-uuid in pathinfo xattr */ + gf_boolean_t node_uuid_pathinfo; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both + the volumes will lead to erroneous df output at the client, + since free space on the partition will be counted twice. + + In such cases, user can disable exporting statvfs info + on one of the volumes by setting this option. + */ + gf_boolean_t export_statfs; + + gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ + + /* + decide whether posix_unlink does open (file), unlink (file), close (fd) + instead of just unlink (file). with the former approach there is no + lockout of access to parent directory during removal of very large files + for the entire duration of freeing of data blocks. + */ + gf_boolean_t background_unlink; + gf_boolean_t aio_configured; + gf_boolean_t aio_init_done; + gf_boolean_t aio_capable; + uint32_t rel_fdcount; }; typedef struct { @@ -269,9 +265,11 @@ typedef struct { fd_t *fd; int fdnum; int flags; - int32_t op_errno; char *list; size_t list_size; + int32_t op_errno; + + char _pad[4]; /* manual padding */ } posix_xattr_filler_t; typedef struct { @@ -295,7 +293,7 @@ typedef struct { char gfid_str[64] = {0}; \ uuid_utoa_r(gfid, gfid_str); \ path_len = strlen(base_path) + 1 + SLEN(GF_UNLINK_PATH) + 1 + \ - strlen(gfid_str) + 1; \ + UUID_CANONICAL_FORM_LEN + 1; \ unlink_path = alloca(path_len); \ if (!unlink_path) { \ gf_msg("posix", GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, \ @@ -331,11 +329,12 @@ posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename, int posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *real_path, struct iatt *iatt, gf_boolean_t inode_locked); + dict_t * posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd, int fdnum, dict_t *xattr, struct iatt *buf); int -posix_handle_pair(xlator_t *this, const char *real_path, char *key, +posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, data_t *value, int flags, struct iatt *stbuf); int posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, @@ -348,7 +347,8 @@ int posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req); int -posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict); +posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, + dict_t *dict); int posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, @@ -384,9 +384,6 @@ void posix_gfid_unset(xlator_t *this, dict_t *xdata); int -posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s); - -int posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s); int32_t @@ -664,4 +661,13 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); int posix_spawn_ctx_janitor_thread(xlator_t *this); +void +posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); + +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data); + #endif /* _POSIX_H */ |
