diff options
Diffstat (limited to 'xlators/storage/posix/src')
-rw-r--r-- | xlators/storage/posix/src/posix-common.c | 163 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-entry-ops.c | 75 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-handle.c | 170 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-handle.h | 11 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 260 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 121 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-metadata.h | 6 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.h | 106 |
8 files changed, 643 insertions, 269 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index f4003942f4e..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -140,6 +140,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) struct timespec sleep_till = { 0, }; + glusterfs_ctx_t *ctx = this->ctx; switch (event) { case GF_EVENT_PARENT_UP: { @@ -150,8 +151,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_PARENT_DOWN: { if (!victim->cleanup_starting) break; - gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", - victim->name); if (priv->janitor) { pthread_mutex_lock(&priv->janitor_mutex); @@ -160,7 +159,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); if (!ret) { - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait to set janitor_task flag to _gf_false by * janitor_task_done */ @@ -168,7 +167,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) (void)pthread_cond_timedwait(&priv->janitor_cond, &priv->janitor_mutex, &sleep_till); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; } } @@ -177,6 +176,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) GF_FREE(priv->janitor); } priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); } break; default: @@ -543,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this) return 0; } +int +posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name) +{ + int ret = -1; + + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno == ENOENT) { + ret = sys_mkdirat(pdirfd, dir_name, 0700); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", dir_name); + goto out; + } + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-1 %s ", dir_name); + goto out; + } + } +out: + return ret; +} + /** * init - */ @@ -579,6 +612,15 @@ posix_init(xlator_t *this) int force_directory = -1; int create_mask = -1; int create_directory_mask = -1; + char dir_handle[PATH_MAX] = { + 0, + }; + int i; + char fhash[4] = { + 0, + }; + int hdirfd = -1; + char value; dir_data = dict_get(this->options, "directory"); @@ -621,6 +663,11 @@ posix_init(xlator_t *this) _private->base_path = gf_strdup(dir_data->data); _private->base_path_length = dir_data->len - 1; + _private->dirfd = -1; + _private->mount_lock = -1; + for (i = 0; i < 256; i++) + _private->arrdfd[i] = -1; + ret = dict_get_str(this->options, "hostname", &_private->hostname); if (ret) { _private->hostname = GF_CALLOC(256, sizeof(char), gf_common_mt_char); @@ -635,16 +682,11 @@ posix_init(xlator_t *this) } /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", - 8, 0); - if (op_ret != -1) { - ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); - if (ret) { - gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, - "failed to remove xattr: " - "trusted.glusterfs.test"); - } - } else { + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); tmp_data = dict_get(this->options, "mandate-attribute"); if (tmp_data) { if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { @@ -893,8 +935,9 @@ posix_init(xlator_t *this) /* performing open dir on brick dir locks the brick dir * and prevents it from being unmounted */ - _private->mount_lock = sys_opendir(dir_data->data); - if (!_private->mount_lock) { + _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY), + 0); + if (_private->mount_lock < 0) { ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, @@ -938,6 +981,28 @@ posix_init(xlator_t *this) } this->private = (void *)_private; + snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path, + GF_HIDDEN_PATH); + hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock, + dir_handle); + if (hdirfd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error open directory failed for dir %s", dir_handle); + ret = -1; + goto out; + } + _private->dirfd = hdirfd; + for (i = 0; i < 256; i++) { + snprintf(fhash, sizeof(fhash), "%02x", i); + _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd, + fhash); + if (_private->arrdfd[i] < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error openat failed for file %s", fhash); + ret = -1; + goto out; + } + } op_ret = posix_handle_init(this); if (op_ret == -1) { @@ -1028,7 +1093,9 @@ posix_init(xlator_t *this) pthread_cond_init(&_private->fsync_cond, NULL); pthread_mutex_init(&_private->janitor_mutex, NULL); pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; ret = posix_spawn_ctx_janitor_thread(this); if (ret) goto out; @@ -1105,9 +1172,27 @@ posix_init(xlator_t *this) out); GF_OPTION_INIT("ctime", _private->ctime, bool, out); + out: if (ret) { if (_private) { + if (_private->dirfd >= 0) { + sys_close(_private->dirfd); + _private->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (_private->arrdfd[i] >= 0) { + sys_close(_private->arrdfd[i]); + _private->arrdfd[i] = -1; + } + } + /*unlock brick dir*/ + if (_private->mount_lock >= 0) { + (void)sys_close(_private->mount_lock); + _private->mount_lock = -1; + } + GF_FREE(_private->base_path); GF_FREE(_private->hostname); @@ -1127,7 +1212,10 @@ posix_fini(xlator_t *this) { struct posix_private *priv = this->private; gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; int ret = 0; + int i = 0; if (!priv) return; @@ -1138,6 +1226,18 @@ posix_fini(xlator_t *this) } UNLOCK(&priv->lock); + if (priv->dirfd >= 0) { + sys_close(priv->dirfd); + priv->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (priv->arrdfd[i] >= 0) { + sys_close(priv->arrdfd[i]); + priv->arrdfd[i] = -1; + } + } + if (health_check) { (void)gf_thread_cleanup_xint(priv->health_check); priv->health_check = 0; @@ -1160,13 +1260,28 @@ posix_fini(xlator_t *this) priv->janitor = NULL; } + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; } /*unlock brick dir*/ - if (priv->mount_lock) - (void)sys_closedir(priv->mount_lock); + if (priv->mount_lock >= 0) { + (void)sys_close(priv->mount_lock); + priv->mount_lock = -1; + } GF_FREE(priv->base_path); LOCK_DESTROY(&priv->lock); @@ -1351,24 +1466,21 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will always be set on a file."}, {.key = {"force-directory-mode"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will be always set on directory"}, {.key = {"create-mask"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a file when it is created"}, {.key = {"create-directory-mask"}, @@ -1376,8 +1488,7 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a directory when it is created"}, {.key = {"max-hardlinks"}, diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c index 1f1e05f1dc9..8cc3ccf8c00 100644 --- a/xlators/storage/posix/src/posix-entry-ops.c +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -176,6 +176,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) struct posix_private *priv = NULL; posix_inode_ctx_t *ctx = NULL; int ret = 0; + int dfd = -1; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); @@ -197,6 +198,19 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && loc->name && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED, + "Lookup issued on .attribute," + " which is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif /* __NetBSD__ */ + op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless); op_ret = -1; if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) { @@ -232,12 +246,12 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!op_errno) op_errno = ESTALE; loc_gfid(loc, gfid); - MAKE_HANDLE_ABSPATH(gfid_path, this, gfid); - ret = sys_stat(gfid_path, &statbuf); + MAKE_HANDLE_ABSPATH_FD(gfid_path, this, gfid, dfd); + ret = sys_fstatat(dfd, gfid_path, &statbuf, 0); if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR)) /*Don't unset if it was a symlink to a dir.*/ goto parent; - ret = sys_lstat(gfid_path, &statbuf); + ret = sys_fstatat(dfd, gfid_path, &statbuf, AT_SYMLINK_NOFOLLOW); if (ret == 0 && statbuf.st_nlink == 1) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_HANDLE_DELETE, @@ -649,6 +663,19 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED, + "mkdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; VALIDATE_OR_GOTO(priv, out); GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, @@ -1415,6 +1442,19 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED, + "rmdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -2145,6 +2185,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, 0, }; + dict_t *xdata_rsp = dict_ref(xdata); + DECLARE_OLD_FS_ID_VAR; VALIDATE_OR_GOTO(frame, out); @@ -2194,6 +2236,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, was_present = 0; } + if (!was_present) { + if (posix_is_layout_stale(xdata, par_path, this)) { + op_ret = -1; + op_errno = EIO; + if (!xdata_rsp) { + xdata_rsp = dict_new(); + if (!xdata_rsp) { + op_errno = ENOMEM; + goto out; + } + } + + if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == + -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, + "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); + } + + goto out; + } + } + if (priv->o_direct) _flags |= O_DIRECT; @@ -2313,7 +2377,10 @@ out: STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, (loc) ? loc->inode : NULL, &stbuf, &preparent, - &postparent, xdata); + &postparent, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); return 0; } diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c index f58f5416ff0..410b38da8cb 100644 --- a/xlators/storage/posix/src/posix-handle.c +++ b/xlators/storage/posix/src/posix-handle.c @@ -25,7 +25,7 @@ #include <glusterfs/compat-errno.h> int -posix_handle_mkdir_hashes(xlator_t *this, const char *newpath); +posix_handle_mkdir_hashes(xlator_t *this, int dfd, uuid_t gfid); inode_t * posix_resolve(xlator_t *this, inode_table_t *itable, inode_t *parent, @@ -331,9 +331,23 @@ posix_handle_pump(xlator_t *this, char *buf, int len, int maxlen, int ret = 0; int blen = 0; int link_len = 0; + char tmpstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + char d2[3] = { + 0, + }; + int index = 0; + int dirfd = 0; + struct posix_private *priv = this->private; + + strncpy(tmpstr, (base_str + pfx_len + 3), 40); + strncpy(d2, (base_str + pfx_len), 2); + index = strtoul(d2, NULL, 16); + dirfd = priv->arrdfd[index]; /* is a directory's symlink-handle */ - ret = sys_readlink(base_str, linkname, 512); + ret = readlinkat(dirfd, tmpstr, linkname, 512); if (ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED, "internal readlink failed on %s ", base_str); @@ -398,6 +412,11 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, int pfx_len; int maxlen; char *buf; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; priv = this->private; @@ -411,12 +430,14 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, buf = alloca(maxlen); } + index = gfid[0]; + dfd = priv->arrdfd[index]; + base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45); base_str = alloca(base_len + 1); base_len = snprintf(base_str, base_len + 1, "%s/%s/%02x/%02x/%s", priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); - pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1; if (basename) { @@ -425,7 +446,8 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, len = snprintf(buf, maxlen, "%s", base_str); } - ret = sys_lstat(base_str, &stat); + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_str); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1)) goto out; @@ -438,7 +460,6 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, if (ret == -1) break; - ret = sys_lstat(buf, &stat); } while ((ret == -1) && errno == ELOOP); @@ -485,6 +506,7 @@ posix_handle_init(xlator_t *this) struct stat exportbuf; char *rootstr = NULL; static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int dfd = 0; priv = this->private; @@ -534,9 +556,8 @@ posix_handle_init(xlator_t *this) return -1; } - MAKE_HANDLE_ABSPATH(rootstr, this, gfid); - - ret = sys_stat(rootstr, &rootbuf); + MAKE_HANDLE_ABSPATH_FD(rootstr, this, gfid, dfd); + ret = sys_fstatat(dfd, rootstr, &rootbuf, 0); switch (ret) { case -1: if (errno != ENOENT) { @@ -544,15 +565,14 @@ posix_handle_init(xlator_t *this) "%s", priv->base_path); return -1; } - - ret = posix_handle_mkdir_hashes(this, rootstr); + ret = posix_handle_mkdir_hashes(this, dfd, gfid); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "mkdir %s failed", rootstr); return -1; } - ret = sys_symlink("../../..", rootstr); + ret = sys_symlinkat("../../..", dfd, rootstr); if (ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, "symlink %s creation failed", rootstr); @@ -681,30 +701,18 @@ out: } int -posix_handle_mkdir_hashes(xlator_t *this, const char *newpath) +posix_handle_mkdir_hashes(xlator_t *this, int dirfd, uuid_t gfid) { - char *duppath = NULL; - char *parpath = NULL; - int ret = 0; - - duppath = strdupa(newpath); - parpath = dirname(duppath); - parpath = dirname(duppath); - - ret = sys_mkdir(parpath, 0700); - if (ret == -1 && errno != EEXIST) { - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, - "error mkdir hash-1 %s ", parpath); - return -1; - } - - strcpy(duppath, newpath); - parpath = dirname(duppath); + int ret = -1; + char d2[3] = { + 0, + }; - ret = sys_mkdir(parpath, 0700); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_mkdirat(dirfd, d2, 0700); if (ret == -1 && errno != EEXIST) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, - "error mkdir hash-2 %s ", parpath); + "error mkdir hash-2 %s ", uuid_utoa(gfid)); return -1; } @@ -715,51 +723,59 @@ int posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf) { - char *newpath = NULL; struct stat newbuf; + struct stat hashbuf; int ret = -1; gf_boolean_t link_exists = _gf_false; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; - MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - ret = sys_lstat(newpath, &newbuf); if (ret == -1 && errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", - newpath); + uuid_utoa(gfid)); return -1; } if (ret == -1 && errno == ENOENT) { - ret = posix_handle_mkdir_hashes(this, newpath); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "mkdir %s failed ", newpath); - return -1; + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", uuid_utoa(gfid)); + return -1; + } } - - ret = sys_link(oldpath, newpath); + ret = sys_linkat(AT_FDCWD, oldpath, dfd, newstr); if (ret) { if (errno != EEXIST) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "link %s -> %s" "failed ", - oldpath, newpath); + oldpath, newstr); return -1; } else { link_exists = _gf_true; } } + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - ret = sys_lstat(newpath, &newbuf); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "lstat on %s failed", newpath); + "lstat on %s failed", uuid_utoa(gfid)); return -1; } if ((link_exists) && (!S_ISREG(newbuf.st_mode))) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_HANDLE_CREATE, - "%s - Expected regular file", newpath); + "%s - Expected regular file", uuid_utoa(gfid)); return -1; } } @@ -769,7 +785,8 @@ posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, "mismatching ino/dev between file %s (%lld/%lld) " "and handle %s (%lld/%lld)", oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev, - newpath, (long long)newbuf.st_ino, (long long)newbuf.st_dev); + uuid_utoa(gfid), (long long)newbuf.st_ino, + (long long)newbuf.st_dev); ret = -1; } @@ -783,15 +800,23 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, char *oldpath = NULL; char *newpath = NULL; struct stat newbuf; + struct stat hashbuf; int ret = -1; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); MAKE_HANDLE_RELPATH(oldpath, this, loc->pargfid, loc->name); - ret = sys_lstat(newpath, &newbuf); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + if (ret == -1 && errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", - newpath); + newstr); return -1; } @@ -801,24 +826,30 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, errno = EINVAL; return -1; } - ret = posix_handle_mkdir_hashes(this, newpath); + + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); + if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "mkdir %s failed ", newpath); - return -1; + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", newstr); + return -1; + } } - - ret = sys_symlink(oldpath, newpath); + ret = sys_symlinkat(oldpath, dfd, newstr); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "symlink %s -> %s failed", oldpath, newpath); + "symlink %s -> %s failed", oldpath, newstr); return -1; } - ret = sys_lstat(newpath, &newbuf); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "stat on %s failed ", newpath); + "stat on %s failed ", newstr); return -1; } } @@ -826,7 +857,7 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, ret = sys_stat(real_path, &newbuf); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "stat on %s failed ", newpath); + "stat on %s failed ", real_path); return -1; } @@ -848,26 +879,33 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, int posix_handle_unset_gfid(xlator_t *this, uuid_t gfid) { - char *path = NULL; - int ret = -1; + int ret = 0; struct stat stat; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + struct posix_private *priv = this->private; - MAKE_HANDLE_GFID_PATH(path, this, gfid); + index = gfid[0]; + dfd = priv->arrdfd[index]; - ret = sys_lstat(path, &stat); + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_utoa(gfid)); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); if (ret == -1) { if (errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s", - path); + newstr); } goto out; } - ret = sys_unlink(path); - if (ret == -1) { + ret = sys_unlinkat(dfd, newstr); + if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, - "unlink %s failed ", path); + "unlink %s is failed", newstr); } out: diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h index 70c68c3d89c..f33ed92620d 100644 --- a/xlators/storage/posix/src/posix-handle.h +++ b/xlators/storage/posix/src/posix-handle.h @@ -141,6 +141,16 @@ __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \ } while (0) +#define MAKE_HANDLE_ABSPATH_FD(var, this, gfid, dfd) \ + do { \ + struct posix_private *__priv = this->private; \ + int findex = gfid[0]; \ + int __len = POSIX_GFID_HASH2_LEN; \ + var = alloca(__len); \ + snprintf(var, __len, "%02x/%s", gfid[1], uuid_utoa(gfid)); \ + dfd = __priv->arrdfd[findex]; \ + } while (0) + #define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) \ do { \ char *__parp; \ @@ -184,6 +194,7 @@ /* expand ELOOP */ \ } while (0) +#define POSIX_GFID_HASH2_LEN 45 int posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t len); diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index cbc271481a6..67db3324083 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -824,6 +824,11 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, "lstat failed on %s", path); errno = op_errno; /*gf_msg could have changed errno*/ + } else { + op_errno = errno; + gf_msg_debug(this->name, 0, "lstat failed on %s (%s)", path, + strerror(errno)); + errno = op_errno; /*gf_msg could have changed errno*/ } goto out; } @@ -1065,7 +1070,7 @@ verify_handle: ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); out: - if (!(*op_errno)) + if (ret && !(*op_errno)) *op_errno = errno; return ret; } @@ -1500,7 +1505,7 @@ posix_janitor_task(void *data) if (!priv) goto out; - time(&now); + now = gf_time(); if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { if (priv->disable_landfill_purge) { gf_msg_debug(this->name, 0, @@ -1588,113 +1593,107 @@ unlock: } static struct posix_fd * -janitor_get_next_fd(glusterfs_ctx_t *ctx, int32_t janitor_sleep) +janitor_get_next_fd(glusterfs_ctx_t *ctx) { struct posix_fd *pfd = NULL; - struct timespec timeout; + while (list_empty(&ctx->janitor_fds)) { + if (ctx->pxl_count == 0) { + return NULL; + } - pthread_mutex_lock(&ctx->janitor_lock); - { - if (list_empty(&ctx->janitor_fds)) { - time(&timeout.tv_sec); - timeout.tv_sec += janitor_sleep; - timeout.tv_nsec = 0; + pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); + } - pthread_cond_timedwait(&ctx->janitor_cond, &ctx->janitor_lock, - &timeout); - goto unlock; - } + pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); + list_del_init(&pfd->list); - pfd = list_entry(ctx->janitor_fds.next, struct posix_fd, list); + return pfd; +} - list_del(ctx->janitor_fds.next); +static void +posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) +{ + THIS = xl; + + if (pfd->dir == NULL) { + gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); + sys_close(pfd->fd); + } else { + gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); + sys_closedir(pfd->dir); } -unlock: - pthread_mutex_unlock(&ctx->janitor_lock); - return pfd; + GF_FREE(pfd); } static void * posix_ctx_janitor_thread_proc(void *data) { - xlator_t *this = NULL; + xlator_t *xl; struct posix_fd *pfd; glusterfs_ctx_t *ctx = NULL; - struct posix_private *priv = NULL; - int32_t sleep_duration = 0; + struct posix_private *priv_fd; - this = data; - ctx = THIS->ctx; - THIS = this; + ctx = data; - priv = this->private; - sleep_duration = priv->janitor_sleep_duration; - while (1) { - pfd = janitor_get_next_fd(ctx, sleep_duration); - if (pfd) { - if (pfd->dir == NULL) { - gf_msg_trace(this->name, 0, "janitor: closing file fd=%d", - pfd->fd); - sys_close(pfd->fd); - } else { - gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", - pfd->dir); - sys_closedir(pfd->dir); - } + pthread_mutex_lock(&ctx->fd_lock); - GF_FREE(pfd); - } + while ((pfd = janitor_get_next_fd(ctx)) != NULL) { + pthread_mutex_unlock(&ctx->fd_lock); + + xl = pfd->xl; + posix_close_pfd(xl, pfd); + + pthread_mutex_lock(&ctx->fd_lock); + + priv_fd = xl->private; + priv_fd->rel_fdcount--; + if (!priv_fd->rel_fdcount) + pthread_cond_signal(&priv_fd->fd_cond); } + pthread_mutex_unlock(&ctx->fd_lock); + return NULL; } int posix_spawn_ctx_janitor_thread(xlator_t *this) { - struct posix_private *priv = NULL; int ret = 0; glusterfs_ctx_t *ctx = NULL; - priv = this->private; - ctx = THIS->ctx; + ctx = this->ctx; - LOCK(&priv->lock); + pthread_mutex_lock(&ctx->fd_lock); { - if (!ctx->janitor) { - pthread_mutex_init(&ctx->janitor_lock, NULL); - pthread_cond_init(&ctx->janitor_cond, NULL); - INIT_LIST_HEAD(&ctx->janitor_fds); - + if (ctx->pxl_count++ == 0) { ret = gf_thread_create(&ctx->janitor, NULL, - posix_ctx_janitor_thread_proc, this, + posix_ctx_janitor_thread_proc, ctx, "posixctxjan"); if (ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, - "spawning janitor " - "thread failed"); - goto unlock; + "spawning janitor thread failed"); + ctx->pxl_count--; } } } -unlock: - UNLOCK(&priv->lock); + pthread_mutex_unlock(&ctx->fd_lock); + return ret; } static int -is_fresh_file(int64_t sec, int64_t ns) +is_fresh_file(struct timespec *ts) { - struct timeval tv; + struct timespec now; int64_t elapsed; - gettimeofday(&tv, NULL); + timespec_now_realtime(&now); + elapsed = (int64_t)gf_tsdiff(ts, &now); - elapsed = (tv.tv_sec - sec) * 1000000L; - elapsed += tv.tv_usec - (ns / 1000L); if (elapsed < 0) { /* The file has been modified in the future !!! * Is it fresh ? previous implementation considered this as a @@ -1703,11 +1702,7 @@ is_fresh_file(int64_t sec, int64_t ns) } /* If the file is newer than a second, we consider it fresh. */ - if (elapsed < 1000000) { - return 1; - } - - return 0; + return elapsed < 1000000; } int @@ -1770,7 +1765,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) if (ret != 16) { /* TODO: This is a very hacky way of doing this, and very prone to * errors and unexpected behavior. This should be changed. */ - if (is_fresh_file(stbuf.ia_ctime, stbuf.ia_ctime_nsec)) { + struct timespec ts = {.tv_sec = stbuf.ia_ctime, + .tv_nsec = stbuf.ia_ctime_nsec}; + if (is_fresh_file(&ts)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1784,7 +1781,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) if (ret != 16) { /* TODO: This is a very hacky way of doing this, and very prone to * errors and unexpected behavior. This should be changed. */ - if (is_fresh_file(stat.st_ctim.tv_sec, stat.st_ctim.tv_nsec)) { + if (is_fresh_file(&stat.st_ctim)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -2017,7 +2014,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) { struct posix_private *priv = NULL; int ret = -1; - char timestamp[256] = { + char timestamp[GF_TIMESTR_SIZE] = { 0, }; int fd = -1; @@ -2032,9 +2029,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) int timeout = 0; struct aiocb aiocb; - GF_VALIDATE_OR_GOTO(this->name, this, out); priv = this->private; - GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); timeout = priv->health_check_timeout; @@ -2045,7 +2040,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) goto out; } - time_sec = time(NULL); + time_sec = gf_time(); gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); timelen = strlen(timestamp); @@ -2317,7 +2312,7 @@ posix_disk_space_check(xlator_t *this) double totsz = 0; double freesz = 0; - GF_VALIDATE_OR_GOTO(this->name, this, out); + GF_VALIDATE_OR_GOTO("posix-helpers", this, out); priv = this->private; GF_VALIDATE_OR_GOTO(this->name, priv, out); @@ -2410,7 +2405,7 @@ posix_spawn_disk_space_check_thread(xlator_t *xl) ret = gf_thread_create(&priv->disk_space_check, NULL, posix_disk_space_check_thread_proc, xl, - "posix_reserve"); + "posixrsv"); if (ret) { priv->disk_space_check_active = _gf_false; gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, @@ -2490,23 +2485,8 @@ posix_fsyncer_syncfs(xlator_t *this, struct list_head *head) stub = list_entry(head->prev, call_stub_t, list); ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL); - if (ret) - return; - -#ifdef GF_LINUX_HOST_OS - /* syncfs() is not "declared" in RHEL's glibc even though - the kernel has support. - */ -#include <sys/syscall.h> -#include <unistd.h> -#ifdef SYS_syncfs - syscall(SYS_syncfs, pfd->fd); -#else - sync(); -#endif -#else - sync(); -#endif + if (!ret) + (void)gf_syncfs(pfd->fd); } void * @@ -3586,3 +3566,101 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) } } } + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) +{ + int op_ret = 0; + ssize_t size = 0; + char value_buf[4096] = { + 0, + }; + gf_boolean_t have_val = _gf_false; + data_t *arg_data = NULL; + char *xattr_name = NULL; + size_t xattr_len = 0; + gf_boolean_t is_stale = _gf_false; + + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name == NULL) { + op_ret = 0; + return is_stale; + } + + xattr_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_len); + if (!arg_data) { + op_ret = 0; + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + return is_stale; + } + + size = sys_lgetxattr(par_path, xattr_name, value_buf, + sizeof(value_buf) - 1); + + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) path (%s) failed due to" + " buffer overflow", + xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + } + if (size < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed, path : %s", xattr_name, + par_path); + goto out; + } + } + + if (!have_val) { + size = sys_lgetxattr(par_path, xattr_name, value_buf, size); + if (size < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed (%s)", xattr_name, + strerror(errno)); + goto out; + } + } + + if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { + gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, + "failing preop as on-disk xattr value differs from argument " + "value for key %s", + xattr_name); + op_ret = -1; + } + +out: + dict_deln(xdata, xattr_name, xattr_len); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + + if (op_ret == -1) { + is_stale = _gf_true; + } + + return is_stale; +} + +/* Delete user xattr from the file at the file-path specified by data and from + * dict */ +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data) +{ + int ret; + char *real_path = data; + + ret = sys_lremovexattr(real_path, k); + if (ret) { + gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, + "removexattr failed. key %s path %s", k, real_path); + } + + dict_del(dict, k); + + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index a6c2b512ef1..6d54d37e5aa 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -54,6 +54,7 @@ #include <glusterfs/events.h> #include "posix-gfid-path.h" #include <glusterfs/compat-uuid.h> +#include <glusterfs/common-utils.h> extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -1360,13 +1361,28 @@ out: return 0; } +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + int32_t posix_releasedir(xlator_t *this, fd_t *fd) { struct posix_fd *pfd = NULL; uint64_t tmp_pfd = 0; int ret = 0; - glusterfs_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); @@ -1383,22 +1399,8 @@ posix_releasedir(xlator_t *this, fd_t *fd) "pfd->dir is NULL for fd=%p", fd); goto out; } + posix_add_fd_to_cleanup(this, pfd); - ctx = THIS->ctx; - - pthread_mutex_lock(&ctx->janitor_lock); - { - INIT_LIST_HEAD(&pfd->list); - list_add_tail(&pfd->list, &ctx->janitor_fds); - pthread_cond_signal(&ctx->janitor_cond); - } - pthread_mutex_unlock(&ctx->janitor_lock); - - /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); - - sys_closedir(pfd->dir); - GF_FREE(pfd); - */ out: return 0; } @@ -2305,8 +2307,7 @@ posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, flags); if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; + op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, "copy_file_range failed: fd_in: %p (gfid: %s) ," " fd_out %p (gfid:%s)", @@ -2521,18 +2522,13 @@ out: int32_t posix_release(xlator_t *this, fd_t *fd) { - struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; int ret = -1; uint64_t tmp_pfd = 0; - glusterfs_ctx_t *ctx = NULL; VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); - priv = this->private; - ctx = THIS->ctx; - ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, @@ -2546,16 +2542,7 @@ posix_release(xlator_t *this, fd_t *fd) "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } - pthread_mutex_lock(&ctx->janitor_lock); - { - INIT_LIST_HEAD(&pfd->list); - list_add_tail(&pfd->list, &ctx->janitor_fds); - pthread_cond_signal(&ctx->janitor_cond); - } - pthread_mutex_unlock(&ctx->janitor_lock); - - if (!priv) - goto out; + posix_add_fd_to_cleanup(this, pfd); out: return 0; @@ -2725,6 +2712,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t ret = 0; ssize_t acl_size = 0; dict_t *xattr = NULL; + dict_t *subvol_xattrs = NULL; posix_xattr_filler_t filler = { 0, }; @@ -2740,6 +2728,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, struct mdata_iatt mdata_iatt = { 0, }; + int8_t sync_backend_xattrs = _gf_false; + data_pair_t *custom_xattrs; + data_t *keyval = NULL; + char **xattrs_to_heal = get_xattrs_to_heal(); DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); @@ -2922,6 +2914,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto out; } + ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs"); + } + + if (sync_backend_xattrs) { + /* List all custom xattrs */ + subvol_xattrs = dict_new(); + if (!subvol_xattrs) + goto out; + + ret = dict_set_int32_sizen(xdata, "list-xattr", 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "Unable to set list-xattr in dict "); + goto out; + } + + subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + NULL); + + /* Remove all user xattrs from the file */ + dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr, + real_path); + + /* Remove all custom xattrs from the file */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]); + if (keyval) { + ret = sys_lremovexattr(real_path, xattrs_to_heal[i]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, + errno, "removexattr failed. key %s path %s", + xattrs_to_heal[i], loc->path); + goto out; + } + + dict_del(subvol_xattrs, xattrs_to_heal[i]); + keyval = NULL; + } + } + + /* Set custom xattrs based on info provided by DHT */ + custom_xattrs = dict->members_list; + + while (custom_xattrs != NULL) { + ret = sys_lsetxattr(real_path, custom_xattrs->key, + custom_xattrs->value->data, + custom_xattrs->value->len, flags); + if (ret) { + op_errno = errno; + gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d", + custom_xattrs->key, ret); + goto out; + } + + custom_xattrs = custom_xattrs->next; + } + } + xattr = dict_new(); if (!xattr) goto out; @@ -3029,6 +3081,9 @@ out: if (xattr) dict_unref(xattr); + if (subvol_xattrs) + dict_unref(subvol_xattrs); + return 0; } diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h index 63e8771d3b1..d37014af93e 100644 --- a/xlators/storage/posix/src/posix-metadata.h +++ b/xlators/storage/posix/src/posix-metadata.h @@ -15,13 +15,15 @@ /* In memory representation posix metadata xattr */ typedef struct { - /* version of structure, bumped up if any new member is added */ - uint8_t version; /* flags indicates valid fields in the structure */ uint64_t flags; struct timespec ctime; struct timespec mtime; struct timespec atime; + /* version of structure, bumped up if any new member is added */ + uint8_t version; + + char _pad[7]; /* manual padding */ } posix_mdata_t; typedef struct { diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index ce4e1193639..b8db146eef2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -119,12 +119,14 @@ */ struct posix_fd { - int fd; /* fd returned by the kernel */ - int32_t flags; /* flags for open/creat */ - DIR *dir; /* handle returned by the kernel */ - off_t dir_eof; /* offset at dir EOF */ - int odirect; + int fd; /* fd returned by the kernel */ + int32_t flags; /* flags for open/creat */ + DIR *dir; /* handle returned by the kernel */ + off_t dir_eof; /* offset at dir EOF */ struct list_head list; /* to add to the janitor list */ + int odirect; + xlator_t *xl; + char _pad[4]; /* manual padding */ }; struct posix_private { @@ -135,67 +137,38 @@ struct posix_private { gf_lock_t lock; char *hostname; - /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; time_t last_landfill_check; - int32_t janitor_sleep_duration; gf_atomic_t read_value; /* Total read, from init */ gf_atomic_t write_value; /* Total write, from init */ - /* - In some cases, two exported volumes may reside on the same - partition on the server. Sending statvfs info for both - the volumes will lead to erroneous df output at the client, - since free space on the partition will be counted twice. - - In such cases, user can disable exporting statvfs info - on one of the volumes by setting this option. - */ - gf_boolean_t export_statfs; - - gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ - - /* - decide whether posix_unlink does open (file), unlink (file), close (fd) - instead of just unlink (file). with the former approach there is no - lockout of access to parent directory during removal of very large files - for the entire duration of freeing of data blocks. - */ - gf_boolean_t background_unlink; /* janitor task which cleans up /.trash (created by replicate) */ struct gf_tw_timer_list *janitor; char *trash_path; /* lock for brick dir */ - DIR *mount_lock; + int mount_lock; struct stat handledir; /* uuid of glusterd that swapned the brick process */ uuid_t glusterd_uuid; - gf_boolean_t aio_configured; - gf_boolean_t aio_init_done; - gf_boolean_t aio_capable; #ifdef HAVE_LIBAIO io_context_t ctxp; pthread_t aiothread; #endif - /* node-uuid in pathinfo xattr */ - gf_boolean_t node_uuid_pathinfo; - pthread_t fsyncer; struct list_head fsyncs; pthread_mutex_t fsync_mutex; pthread_cond_t fsync_cond; pthread_mutex_t janitor_mutex; pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; int fsync_queue_count; + int32_t janitor_sleep_duration; enum { BATCH_NONE = 0, @@ -206,8 +179,6 @@ struct posix_private { } batch_fsync_mode; uint32_t batch_fsync_delay_usec; - gf_boolean_t update_pgfid_nlinks; - gf_boolean_t gfid2path; char gfid2path_sep[8]; /* seconds to sleep between health checks */ @@ -215,13 +186,10 @@ struct posix_private { /* seconds to sleep to wait for aio write finish for health checks */ uint32_t health_check_timeout; pthread_t health_check; - gf_boolean_t health_check_active; double disk_reserve; - char disk_unit; - uint32_t disk_space_full; pthread_t disk_space_check; - gf_boolean_t disk_space_check_active; + uint32_t disk_space_full; #ifdef GF_DARWIN_HOST_OS enum { @@ -236,9 +204,6 @@ struct posix_private { same backend. Very much usable in brick-splitting feature. */ int32_t shared_brick_count; - /* This option is used for either to call a landfill_purge or not */ - gf_boolean_t disable_landfill_purge; - /*Option to set mode bit permission that will always be set on file/directory. */ mode_t force_create_mode; @@ -246,10 +211,47 @@ struct posix_private { mode_t create_mask; mode_t create_directory_mask; uint32_t max_hardlinks; + int32_t arrdfd[256]; + int dirfd; + + /* This option is used for either to call a landfill_purge or not */ + gf_boolean_t disable_landfill_purge; gf_boolean_t fips_mode_rchecksum; gf_boolean_t ctime; gf_boolean_t janitor_task_stop; + + gf_boolean_t disk_space_check_active; + char disk_unit; + gf_boolean_t health_check_active; + gf_boolean_t update_pgfid_nlinks; + gf_boolean_t gfid2path; + /* node-uuid in pathinfo xattr */ + gf_boolean_t node_uuid_pathinfo; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both + the volumes will lead to erroneous df output at the client, + since free space on the partition will be counted twice. + + In such cases, user can disable exporting statvfs info + on one of the volumes by setting this option. + */ + gf_boolean_t export_statfs; + + gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ + + /* + decide whether posix_unlink does open (file), unlink (file), close (fd) + instead of just unlink (file). with the former approach there is no + lockout of access to parent directory during removal of very large files + for the entire duration of freeing of data blocks. + */ + gf_boolean_t background_unlink; + gf_boolean_t aio_configured; + gf_boolean_t aio_init_done; + gf_boolean_t aio_capable; + uint32_t rel_fdcount; }; typedef struct { @@ -263,9 +265,11 @@ typedef struct { fd_t *fd; int fdnum; int flags; - int32_t op_errno; char *list; size_t list_size; + int32_t op_errno; + + char _pad[4]; /* manual padding */ } posix_xattr_filler_t; typedef struct { @@ -325,6 +329,7 @@ posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename, int posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *real_path, struct iatt *iatt, gf_boolean_t inode_locked); + dict_t * posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd, int fdnum, dict_t *xattr, struct iatt *buf); @@ -658,4 +663,11 @@ posix_spawn_ctx_janitor_thread(xlator_t *this); void posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); + +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data); + #endif /* _POSIX_H */ |