summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/posix/src')
-rw-r--r--xlators/storage/posix/src/posix-common.c163
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c75
-rw-r--r--xlators/storage/posix/src/posix-handle.c170
-rw-r--r--xlators/storage/posix/src/posix-handle.h11
-rw-r--r--xlators/storage/posix/src/posix-helpers.c260
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c121
-rw-r--r--xlators/storage/posix/src/posix-metadata.h6
-rw-r--r--xlators/storage/posix/src/posix.h106
8 files changed, 643 insertions, 269 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
index f4003942f4e..f10722ec3fb 100644
--- a/xlators/storage/posix/src/posix-common.c
+++ b/xlators/storage/posix/src/posix-common.c
@@ -140,6 +140,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
struct timespec sleep_till = {
0,
};
+ glusterfs_ctx_t *ctx = this->ctx;
switch (event) {
case GF_EVENT_PARENT_UP: {
@@ -150,8 +151,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_PARENT_DOWN: {
if (!victim->cleanup_starting)
break;
- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
- victim->name);
if (priv->janitor) {
pthread_mutex_lock(&priv->janitor_mutex);
@@ -160,7 +159,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
priv->janitor);
if (!ret) {
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait to set janitor_task flag to _gf_false by
* janitor_task_done */
@@ -168,7 +167,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
(void)pthread_cond_timedwait(&priv->janitor_cond,
&priv->janitor_mutex,
&sleep_till);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
}
}
@@ -177,6 +176,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
GF_FREE(priv->janitor);
}
priv->janitor = NULL;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ while (priv->rel_fdcount > 0) {
+ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+ victim->name);
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
} break;
default:
@@ -543,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this)
return 0;
}
+int
+posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name)
+{
+ int ret = -1;
+
+ ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0);
+ if (ret < 0 && errno == ENOENT) {
+ ret = sys_mkdirat(pdirfd, dir_name, 0700);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "Creating directory %s failed", dir_name);
+ goto out;
+ }
+ ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0);
+ if (ret < 0 && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error mkdir hash-1 %s ", dir_name);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
/**
* init -
*/
@@ -579,6 +612,15 @@ posix_init(xlator_t *this)
int force_directory = -1;
int create_mask = -1;
int create_directory_mask = -1;
+ char dir_handle[PATH_MAX] = {
+ 0,
+ };
+ int i;
+ char fhash[4] = {
+ 0,
+ };
+ int hdirfd = -1;
+ char value;
dir_data = dict_get(this->options, "directory");
@@ -621,6 +663,11 @@ posix_init(xlator_t *this)
_private->base_path = gf_strdup(dir_data->data);
_private->base_path_length = dir_data->len - 1;
+ _private->dirfd = -1;
+ _private->mount_lock = -1;
+ for (i = 0; i < 256; i++)
+ _private->arrdfd[i] = -1;
+
ret = dict_get_str(this->options, "hostname", &_private->hostname);
if (ret) {
_private->hostname = GF_CALLOC(256, sizeof(char), gf_common_mt_char);
@@ -635,16 +682,11 @@ posix_init(xlator_t *this)
}
/* Check for Extended attribute support, if not present, log it */
- op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working",
- 8, 0);
- if (op_ret != -1) {
- ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test");
- if (ret) {
- gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION,
- "failed to remove xattr: "
- "trusted.glusterfs.test");
- }
- } else {
+ size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value));
+
+ if ((size == -1) && (errno == EOPNOTSUPP)) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR,
+ "getxattr returned %zd", size);
tmp_data = dict_get(this->options, "mandate-attribute");
if (tmp_data) {
if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) {
@@ -893,8 +935,9 @@ posix_init(xlator_t *this)
/* performing open dir on brick dir locks the brick dir
* and prevents it from being unmounted
*/
- _private->mount_lock = sys_opendir(dir_data->data);
- if (!_private->mount_lock) {
+ _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY),
+ 0);
+ if (_private->mount_lock < 0) {
ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED,
@@ -938,6 +981,28 @@ posix_init(xlator_t *this)
}
this->private = (void *)_private;
+ snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path,
+ GF_HIDDEN_PATH);
+ hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock,
+ dir_handle);
+ if (hdirfd < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error open directory failed for dir %s", dir_handle);
+ ret = -1;
+ goto out;
+ }
+ _private->dirfd = hdirfd;
+ for (i = 0; i < 256; i++) {
+ snprintf(fhash, sizeof(fhash), "%02x", i);
+ _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd,
+ fhash);
+ if (_private->arrdfd[i] < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
+ "error openat failed for file %s", fhash);
+ ret = -1;
+ goto out;
+ }
+ }
op_ret = posix_handle_init(this);
if (op_ret == -1) {
@@ -1028,7 +1093,9 @@ posix_init(xlator_t *this)
pthread_cond_init(&_private->fsync_cond, NULL);
pthread_mutex_init(&_private->janitor_mutex, NULL);
pthread_cond_init(&_private->janitor_cond, NULL);
+ pthread_cond_init(&_private->fd_cond, NULL);
INIT_LIST_HEAD(&_private->fsyncs);
+ _private->rel_fdcount = 0;
ret = posix_spawn_ctx_janitor_thread(this);
if (ret)
goto out;
@@ -1105,9 +1172,27 @@ posix_init(xlator_t *this)
out);
GF_OPTION_INIT("ctime", _private->ctime, bool, out);
+
out:
if (ret) {
if (_private) {
+ if (_private->dirfd >= 0) {
+ sys_close(_private->dirfd);
+ _private->dirfd = -1;
+ }
+
+ for (i = 0; i < 256; i++) {
+ if (_private->arrdfd[i] >= 0) {
+ sys_close(_private->arrdfd[i]);
+ _private->arrdfd[i] = -1;
+ }
+ }
+ /*unlock brick dir*/
+ if (_private->mount_lock >= 0) {
+ (void)sys_close(_private->mount_lock);
+ _private->mount_lock = -1;
+ }
+
GF_FREE(_private->base_path);
GF_FREE(_private->hostname);
@@ -1127,7 +1212,10 @@ posix_fini(xlator_t *this)
{
struct posix_private *priv = this->private;
gf_boolean_t health_check = _gf_false;
+ glusterfs_ctx_t *ctx = this->ctx;
+ uint32_t count;
int ret = 0;
+ int i = 0;
if (!priv)
return;
@@ -1138,6 +1226,18 @@ posix_fini(xlator_t *this)
}
UNLOCK(&priv->lock);
+ if (priv->dirfd >= 0) {
+ sys_close(priv->dirfd);
+ priv->dirfd = -1;
+ }
+
+ for (i = 0; i < 256; i++) {
+ if (priv->arrdfd[i] >= 0) {
+ sys_close(priv->arrdfd[i]);
+ priv->arrdfd[i] = -1;
+ }
+ }
+
if (health_check) {
(void)gf_thread_cleanup_xint(priv->health_check);
priv->health_check = 0;
@@ -1160,13 +1260,28 @@ posix_fini(xlator_t *this)
priv->janitor = NULL;
}
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ count = --ctx->pxl_count;
+ if (count == 0) {
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ if (count == 0) {
+ pthread_join(ctx->janitor, NULL);
+ }
+
if (priv->fsyncer) {
(void)gf_thread_cleanup_xint(priv->fsyncer);
priv->fsyncer = 0;
}
/*unlock brick dir*/
- if (priv->mount_lock)
- (void)sys_closedir(priv->mount_lock);
+ if (priv->mount_lock >= 0) {
+ (void)sys_close(priv->mount_lock);
+ priv->mount_lock = -1;
+ }
GF_FREE(priv->base_path);
LOCK_DESTROY(&priv->lock);
@@ -1351,24 +1466,21 @@ struct volume_options posix_options[] = {
.min = 0000,
.max = 0777,
.default_value = "0000",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Mode bit permission that will always be set on a file."},
{.key = {"force-directory-mode"},
.type = GF_OPTION_TYPE_INT,
.min = 0000,
.max = 0777,
.default_value = "0000",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Mode bit permission that will be always set on directory"},
{.key = {"create-mask"},
.type = GF_OPTION_TYPE_INT,
.min = 0000,
.max = 0777,
.default_value = "0777",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Any bit not set here will be removed from the"
"modes set on a file when it is created"},
{.key = {"create-directory-mask"},
@@ -1376,8 +1488,7 @@ struct volume_options posix_options[] = {
.min = 0000,
.max = 0777,
.default_value = "0777",
- .validate = GF_OPT_VALIDATE_MIN,
- .validate = GF_OPT_VALIDATE_MAX,
+ .validate = GF_OPT_VALIDATE_BOTH,
.description = "Any bit not set here will be removed from the"
"modes set on a directory when it is created"},
{.key = {"max-hardlinks"},
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
index 1f1e05f1dc9..8cc3ccf8c00 100644
--- a/xlators/storage/posix/src/posix-entry-ops.c
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -176,6 +176,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
struct posix_private *priv = NULL;
posix_inode_ctx_t *ctx = NULL;
int ret = 0;
+ int dfd = -1;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
@@ -197,6 +198,19 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
goto out;
}
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) && loc->name &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED,
+ "Lookup issued on .attribute,"
+ " which is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif /* __NetBSD__ */
+
op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless);
op_ret = -1;
if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) {
@@ -232,12 +246,12 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (!op_errno)
op_errno = ESTALE;
loc_gfid(loc, gfid);
- MAKE_HANDLE_ABSPATH(gfid_path, this, gfid);
- ret = sys_stat(gfid_path, &statbuf);
+ MAKE_HANDLE_ABSPATH_FD(gfid_path, this, gfid, dfd);
+ ret = sys_fstatat(dfd, gfid_path, &statbuf, 0);
if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR))
/*Don't unset if it was a symlink to a dir.*/
goto parent;
- ret = sys_lstat(gfid_path, &statbuf);
+ ret = sys_fstatat(dfd, gfid_path, &statbuf, AT_SYMLINK_NOFOLLOW);
if (ret == 0 && statbuf.st_nlink == 1) {
gf_msg(this->name, GF_LOG_WARNING, op_errno,
P_MSG_HANDLE_DELETE,
@@ -649,6 +663,19 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto out;
}
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED,
+ "mkdir issued on .attribute, which"
+ "is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif
+
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno,
@@ -1415,6 +1442,19 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
goto out;
}
+#ifdef __NetBSD__
+ /* Same for NetBSD's .attribute directory */
+ if (__is_root_gfid(loc->pargfid) &&
+ (strcmp(loc->name, ".attribute") == 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED,
+ "rmdir issued on .attribute, which"
+ "is not permitted");
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+#endif
+
priv = this->private;
MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf);
@@ -2145,6 +2185,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
0,
};
+ dict_t *xdata_rsp = dict_ref(xdata);
+
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
@@ -2194,6 +2236,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
was_present = 0;
}
+ if (!was_present) {
+ if (posix_is_layout_stale(xdata, par_path, this)) {
+ op_ret = -1;
+ op_errno = EIO;
+ if (!xdata_rsp) {
+ xdata_rsp = dict_new();
+ if (!xdata_rsp) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) ==
+ -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED,
+ "setting key %s in dict failed", GF_PREOP_CHECK_FAILED);
+ }
+
+ goto out;
+ }
+ }
+
if (priv->o_direct)
_flags |= O_DIRECT;
@@ -2313,7 +2377,10 @@ out:
STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd,
(loc) ? loc->inode : NULL, &stbuf, &preparent,
- &postparent, xdata);
+ &postparent, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
return 0;
}
diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c
index f58f5416ff0..410b38da8cb 100644
--- a/xlators/storage/posix/src/posix-handle.c
+++ b/xlators/storage/posix/src/posix-handle.c
@@ -25,7 +25,7 @@
#include <glusterfs/compat-errno.h>
int
-posix_handle_mkdir_hashes(xlator_t *this, const char *newpath);
+posix_handle_mkdir_hashes(xlator_t *this, int dfd, uuid_t gfid);
inode_t *
posix_resolve(xlator_t *this, inode_table_t *itable, inode_t *parent,
@@ -331,9 +331,23 @@ posix_handle_pump(xlator_t *this, char *buf, int len, int maxlen,
int ret = 0;
int blen = 0;
int link_len = 0;
+ char tmpstr[POSIX_GFID_HASH2_LEN] = {
+ 0,
+ };
+ char d2[3] = {
+ 0,
+ };
+ int index = 0;
+ int dirfd = 0;
+ struct posix_private *priv = this->private;
+
+ strncpy(tmpstr, (base_str + pfx_len + 3), 40);
+ strncpy(d2, (base_str + pfx_len), 2);
+ index = strtoul(d2, NULL, 16);
+ dirfd = priv->arrdfd[index];
/* is a directory's symlink-handle */
- ret = sys_readlink(base_str, linkname, 512);
+ ret = readlinkat(dirfd, tmpstr, linkname, 512);
if (ret == -1) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED,
"internal readlink failed on %s ", base_str);
@@ -398,6 +412,11 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf,
int pfx_len;
int maxlen;
char *buf;
+ int index = 0;
+ int dfd = 0;
+ char newstr[POSIX_GFID_HASH2_LEN] = {
+ 0,
+ };
priv = this->private;
@@ -411,12 +430,14 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf,
buf = alloca(maxlen);
}
+ index = gfid[0];
+ dfd = priv->arrdfd[index];
+
base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45);
base_str = alloca(base_len + 1);
base_len = snprintf(base_str, base_len + 1, "%s/%s/%02x/%02x/%s",
priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1],
uuid_str);
-
pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1;
if (basename) {
@@ -425,7 +446,8 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf,
len = snprintf(buf, maxlen, "%s", base_str);
}
- ret = sys_lstat(base_str, &stat);
+ snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_str);
+ ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW);
if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1))
goto out;
@@ -438,7 +460,6 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf,
if (ret == -1)
break;
-
ret = sys_lstat(buf, &stat);
} while ((ret == -1) && errno == ELOOP);
@@ -485,6 +506,7 @@ posix_handle_init(xlator_t *this)
struct stat exportbuf;
char *rootstr = NULL;
static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ int dfd = 0;
priv = this->private;
@@ -534,9 +556,8 @@ posix_handle_init(xlator_t *this)
return -1;
}
- MAKE_HANDLE_ABSPATH(rootstr, this, gfid);
-
- ret = sys_stat(rootstr, &rootbuf);
+ MAKE_HANDLE_ABSPATH_FD(rootstr, this, gfid, dfd);
+ ret = sys_fstatat(dfd, rootstr, &rootbuf, 0);
switch (ret) {
case -1:
if (errno != ENOENT) {
@@ -544,15 +565,14 @@ posix_handle_init(xlator_t *this)
"%s", priv->base_path);
return -1;
}
-
- ret = posix_handle_mkdir_hashes(this, rootstr);
+ ret = posix_handle_mkdir_hashes(this, dfd, gfid);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
"mkdir %s failed", rootstr);
return -1;
}
- ret = sys_symlink("../../..", rootstr);
+ ret = sys_symlinkat("../../..", dfd, rootstr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
"symlink %s creation failed", rootstr);
@@ -681,30 +701,18 @@ out:
}
int
-posix_handle_mkdir_hashes(xlator_t *this, const char *newpath)
+posix_handle_mkdir_hashes(xlator_t *this, int dirfd, uuid_t gfid)
{
- char *duppath = NULL;
- char *parpath = NULL;
- int ret = 0;
-
- duppath = strdupa(newpath);
- parpath = dirname(duppath);
- parpath = dirname(duppath);
-
- ret = sys_mkdir(parpath, 0700);
- if (ret == -1 && errno != EEXIST) {
- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
- "error mkdir hash-1 %s ", parpath);
- return -1;
- }
-
- strcpy(duppath, newpath);
- parpath = dirname(duppath);
+ int ret = -1;
+ char d2[3] = {
+ 0,
+ };
- ret = sys_mkdir(parpath, 0700);
+ snprintf(d2, sizeof(d2), "%02x", gfid[1]);
+ ret = sys_mkdirat(dirfd, d2, 0700);
if (ret == -1 && errno != EEXIST) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE,
- "error mkdir hash-2 %s ", parpath);
+ "error mkdir hash-2 %s ", uuid_utoa(gfid));
return -1;
}
@@ -715,51 +723,59 @@ int
posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid,
struct stat *oldbuf)
{
- char *newpath = NULL;
struct stat newbuf;
+ struct stat hashbuf;
int ret = -1;
gf_boolean_t link_exists = _gf_false;
+ char d2[3] = {
+ 0,
+ };
+ int dfd = -1;
+ char *newstr = NULL;
- MAKE_HANDLE_ABSPATH(newpath, this, gfid);
+ MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd);
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
- ret = sys_lstat(newpath, &newbuf);
if (ret == -1 && errno != ENOENT) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s",
- newpath);
+ uuid_utoa(gfid));
return -1;
}
if (ret == -1 && errno == ENOENT) {
- ret = posix_handle_mkdir_hashes(this, newpath);
+ snprintf(d2, sizeof(d2), "%02x", gfid[1]);
+ ret = sys_fstatat(dfd, d2, &hashbuf, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
- "mkdir %s failed ", newpath);
- return -1;
+ ret = posix_handle_mkdir_hashes(this, dfd, gfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "mkdir %s failed ", uuid_utoa(gfid));
+ return -1;
+ }
}
-
- ret = sys_link(oldpath, newpath);
+ ret = sys_linkat(AT_FDCWD, oldpath, dfd, newstr);
if (ret) {
if (errno != EEXIST) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
"link %s -> %s"
"failed ",
- oldpath, newpath);
+ oldpath, newstr);
return -1;
} else {
link_exists = _gf_true;
}
}
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
- ret = sys_lstat(newpath, &newbuf);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
- "lstat on %s failed", newpath);
+ "lstat on %s failed", uuid_utoa(gfid));
return -1;
}
if ((link_exists) && (!S_ISREG(newbuf.st_mode))) {
gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_HANDLE_CREATE,
- "%s - Expected regular file", newpath);
+ "%s - Expected regular file", uuid_utoa(gfid));
return -1;
}
}
@@ -769,7 +785,8 @@ posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid,
"mismatching ino/dev between file %s (%lld/%lld) "
"and handle %s (%lld/%lld)",
oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev,
- newpath, (long long)newbuf.st_ino, (long long)newbuf.st_dev);
+ uuid_utoa(gfid), (long long)newbuf.st_ino,
+ (long long)newbuf.st_dev);
ret = -1;
}
@@ -783,15 +800,23 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc,
char *oldpath = NULL;
char *newpath = NULL;
struct stat newbuf;
+ struct stat hashbuf;
int ret = -1;
+ char d2[3] = {
+ 0,
+ };
+ int dfd = -1;
+ char *newstr = NULL;
MAKE_HANDLE_ABSPATH(newpath, this, gfid);
+ MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd);
MAKE_HANDLE_RELPATH(oldpath, this, loc->pargfid, loc->name);
- ret = sys_lstat(newpath, &newbuf);
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
+
if (ret == -1 && errno != ENOENT) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s",
- newpath);
+ newstr);
return -1;
}
@@ -801,24 +826,30 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc,
errno = EINVAL;
return -1;
}
- ret = posix_handle_mkdir_hashes(this, newpath);
+
+ snprintf(d2, sizeof(d2), "%02x", gfid[1]);
+ ret = sys_fstatat(dfd, d2, &hashbuf, 0);
+
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
- "mkdir %s failed ", newpath);
- return -1;
+ ret = posix_handle_mkdir_hashes(this, dfd, gfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
+ "mkdir %s failed ", newstr);
+ return -1;
+ }
}
-
- ret = sys_symlink(oldpath, newpath);
+ ret = sys_symlinkat(oldpath, dfd, newstr);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
- "symlink %s -> %s failed", oldpath, newpath);
+ "symlink %s -> %s failed", oldpath, newstr);
return -1;
}
- ret = sys_lstat(newpath, &newbuf);
+ ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW);
+
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
- "stat on %s failed ", newpath);
+ "stat on %s failed ", newstr);
return -1;
}
}
@@ -826,7 +857,7 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc,
ret = sys_stat(real_path, &newbuf);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE,
- "stat on %s failed ", newpath);
+ "stat on %s failed ", real_path);
return -1;
}
@@ -848,26 +879,33 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc,
int
posix_handle_unset_gfid(xlator_t *this, uuid_t gfid)
{
- char *path = NULL;
- int ret = -1;
+ int ret = 0;
struct stat stat;
+ int index = 0;
+ int dfd = 0;
+ char newstr[POSIX_GFID_HASH2_LEN] = {
+ 0,
+ };
+ struct posix_private *priv = this->private;
- MAKE_HANDLE_GFID_PATH(path, this, gfid);
+ index = gfid[0];
+ dfd = priv->arrdfd[index];
- ret = sys_lstat(path, &stat);
+ snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_utoa(gfid));
+ ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW);
if (ret == -1) {
if (errno != ENOENT) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s",
- path);
+ newstr);
}
goto out;
}
- ret = sys_unlink(path);
- if (ret == -1) {
+ ret = sys_unlinkat(dfd, newstr);
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE,
- "unlink %s failed ", path);
+ "unlink %s is failed", newstr);
}
out:
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
index 70c68c3d89c..f33ed92620d 100644
--- a/xlators/storage/posix/src/posix-handle.h
+++ b/xlators/storage/posix/src/posix-handle.h
@@ -141,6 +141,16 @@
__priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \
} while (0)
+#define MAKE_HANDLE_ABSPATH_FD(var, this, gfid, dfd) \
+ do { \
+ struct posix_private *__priv = this->private; \
+ int findex = gfid[0]; \
+ int __len = POSIX_GFID_HASH2_LEN; \
+ var = alloca(__len); \
+ snprintf(var, __len, "%02x/%s", gfid[1], uuid_utoa(gfid)); \
+ dfd = __priv->arrdfd[findex]; \
+ } while (0)
+
#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) \
do { \
char *__parp; \
@@ -184,6 +194,7 @@
/* expand ELOOP */ \
} while (0)
+#define POSIX_GFID_HASH2_LEN 45
int
posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t len);
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index cbc271481a6..67db3324083 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -824,6 +824,11 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path,
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
"lstat failed on %s", path);
errno = op_errno; /*gf_msg could have changed errno*/
+ } else {
+ op_errno = errno;
+ gf_msg_debug(this->name, 0, "lstat failed on %s (%s)", path,
+ strerror(errno));
+ errno = op_errno; /*gf_msg could have changed errno*/
}
goto out;
}
@@ -1065,7 +1070,7 @@ verify_handle:
ret = posix_handle_soft(this, path, loc, uuid_curr, &stat);
out:
- if (!(*op_errno))
+ if (ret && !(*op_errno))
*op_errno = errno;
return ret;
}
@@ -1500,7 +1505,7 @@ posix_janitor_task(void *data)
if (!priv)
goto out;
- time(&now);
+ now = gf_time();
if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
if (priv->disable_landfill_purge) {
gf_msg_debug(this->name, 0,
@@ -1588,113 +1593,107 @@ unlock:
}
static struct posix_fd *
-janitor_get_next_fd(glusterfs_ctx_t *ctx, int32_t janitor_sleep)
+janitor_get_next_fd(glusterfs_ctx_t *ctx)
{
struct posix_fd *pfd = NULL;
- struct timespec timeout;
+ while (list_empty(&ctx->janitor_fds)) {
+ if (ctx->pxl_count == 0) {
+ return NULL;
+ }
- pthread_mutex_lock(&ctx->janitor_lock);
- {
- if (list_empty(&ctx->janitor_fds)) {
- time(&timeout.tv_sec);
- timeout.tv_sec += janitor_sleep;
- timeout.tv_nsec = 0;
+ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
+ }
- pthread_cond_timedwait(&ctx->janitor_cond, &ctx->janitor_lock,
- &timeout);
- goto unlock;
- }
+ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
+ list_del_init(&pfd->list);
- pfd = list_entry(ctx->janitor_fds.next, struct posix_fd, list);
+ return pfd;
+}
- list_del(ctx->janitor_fds.next);
+static void
+posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
+{
+ THIS = xl;
+
+ if (pfd->dir == NULL) {
+ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
+ sys_close(pfd->fd);
+ } else {
+ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+ sys_closedir(pfd->dir);
}
-unlock:
- pthread_mutex_unlock(&ctx->janitor_lock);
- return pfd;
+ GF_FREE(pfd);
}
static void *
posix_ctx_janitor_thread_proc(void *data)
{
- xlator_t *this = NULL;
+ xlator_t *xl;
struct posix_fd *pfd;
glusterfs_ctx_t *ctx = NULL;
- struct posix_private *priv = NULL;
- int32_t sleep_duration = 0;
+ struct posix_private *priv_fd;
- this = data;
- ctx = THIS->ctx;
- THIS = this;
+ ctx = data;
- priv = this->private;
- sleep_duration = priv->janitor_sleep_duration;
- while (1) {
- pfd = janitor_get_next_fd(ctx, sleep_duration);
- if (pfd) {
- if (pfd->dir == NULL) {
- gf_msg_trace(this->name, 0, "janitor: closing file fd=%d",
- pfd->fd);
- sys_close(pfd->fd);
- } else {
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p",
- pfd->dir);
- sys_closedir(pfd->dir);
- }
+ pthread_mutex_lock(&ctx->fd_lock);
- GF_FREE(pfd);
- }
+ while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ xl = pfd->xl;
+ posix_close_pfd(xl, pfd);
+
+ pthread_mutex_lock(&ctx->fd_lock);
+
+ priv_fd = xl->private;
+ priv_fd->rel_fdcount--;
+ if (!priv_fd->rel_fdcount)
+ pthread_cond_signal(&priv_fd->fd_cond);
}
+ pthread_mutex_unlock(&ctx->fd_lock);
+
return NULL;
}
int
posix_spawn_ctx_janitor_thread(xlator_t *this)
{
- struct posix_private *priv = NULL;
int ret = 0;
glusterfs_ctx_t *ctx = NULL;
- priv = this->private;
- ctx = THIS->ctx;
+ ctx = this->ctx;
- LOCK(&priv->lock);
+ pthread_mutex_lock(&ctx->fd_lock);
{
- if (!ctx->janitor) {
- pthread_mutex_init(&ctx->janitor_lock, NULL);
- pthread_cond_init(&ctx->janitor_cond, NULL);
- INIT_LIST_HEAD(&ctx->janitor_fds);
-
+ if (ctx->pxl_count++ == 0) {
ret = gf_thread_create(&ctx->janitor, NULL,
- posix_ctx_janitor_thread_proc, this,
+ posix_ctx_janitor_thread_proc, ctx,
"posixctxjan");
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
- "spawning janitor "
- "thread failed");
- goto unlock;
+ "spawning janitor thread failed");
+ ctx->pxl_count--;
}
}
}
-unlock:
- UNLOCK(&priv->lock);
+ pthread_mutex_unlock(&ctx->fd_lock);
+
return ret;
}
static int
-is_fresh_file(int64_t sec, int64_t ns)
+is_fresh_file(struct timespec *ts)
{
- struct timeval tv;
+ struct timespec now;
int64_t elapsed;
- gettimeofday(&tv, NULL);
+ timespec_now_realtime(&now);
+ elapsed = (int64_t)gf_tsdiff(ts, &now);
- elapsed = (tv.tv_sec - sec) * 1000000L;
- elapsed += tv.tv_usec - (ns / 1000L);
if (elapsed < 0) {
/* The file has been modified in the future !!!
* Is it fresh ? previous implementation considered this as a
@@ -1703,11 +1702,7 @@ is_fresh_file(int64_t sec, int64_t ns)
}
/* If the file is newer than a second, we consider it fresh. */
- if (elapsed < 1000000) {
- return 1;
- }
-
- return 0;
+ return elapsed < 1000000;
}
int
@@ -1770,7 +1765,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (ret != 16) {
/* TODO: This is a very hacky way of doing this, and very prone to
* errors and unexpected behavior. This should be changed. */
- if (is_fresh_file(stbuf.ia_ctime, stbuf.ia_ctime_nsec)) {
+ struct timespec ts = {.tv_sec = stbuf.ia_ctime,
+ .tv_nsec = stbuf.ia_ctime_nsec};
+ if (is_fresh_file(&ts)) {
gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
"Fresh file: %s", path);
return -ENOENT;
@@ -1784,7 +1781,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (ret != 16) {
/* TODO: This is a very hacky way of doing this, and very prone to
* errors and unexpected behavior. This should be changed. */
- if (is_fresh_file(stat.st_ctim.tv_sec, stat.st_ctim.tv_nsec)) {
+ if (is_fresh_file(&stat.st_ctim)) {
gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
"Fresh file: %s", path);
return -ENOENT;
@@ -2017,7 +2014,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
{
struct posix_private *priv = NULL;
int ret = -1;
- char timestamp[256] = {
+ char timestamp[GF_TIMESTR_SIZE] = {
0,
};
int fd = -1;
@@ -2032,9 +2029,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
int timeout = 0;
struct aiocb aiocb;
- GF_VALIDATE_OR_GOTO(this->name, this, out);
priv = this->private;
- GF_VALIDATE_OR_GOTO("posix-helpers", priv, out);
timeout = priv->health_check_timeout;
@@ -2045,7 +2040,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
goto out;
}
- time_sec = time(NULL);
+ time_sec = gf_time();
gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT);
timelen = strlen(timestamp);
@@ -2317,7 +2312,7 @@ posix_disk_space_check(xlator_t *this)
double totsz = 0;
double freesz = 0;
- GF_VALIDATE_OR_GOTO(this->name, this, out);
+ GF_VALIDATE_OR_GOTO("posix-helpers", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
@@ -2410,7 +2405,7 @@ posix_spawn_disk_space_check_thread(xlator_t *xl)
ret = gf_thread_create(&priv->disk_space_check, NULL,
posix_disk_space_check_thread_proc, xl,
- "posix_reserve");
+ "posixrsv");
if (ret) {
priv->disk_space_check_active = _gf_false;
gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
@@ -2490,23 +2485,8 @@ posix_fsyncer_syncfs(xlator_t *this, struct list_head *head)
stub = list_entry(head->prev, call_stub_t, list);
ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL);
- if (ret)
- return;
-
-#ifdef GF_LINUX_HOST_OS
- /* syncfs() is not "declared" in RHEL's glibc even though
- the kernel has support.
- */
-#include <sys/syscall.h>
-#include <unistd.h>
-#ifdef SYS_syncfs
- syscall(SYS_syncfs, pfd->fd);
-#else
- sync();
-#endif
-#else
- sync();
-#endif
+ if (!ret)
+ (void)gf_syncfs(pfd->fd);
}
void *
@@ -3586,3 +3566,101 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req)
}
}
}
+
+gf_boolean_t
+posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
+{
+ int op_ret = 0;
+ ssize_t size = 0;
+ char value_buf[4096] = {
+ 0,
+ };
+ gf_boolean_t have_val = _gf_false;
+ data_t *arg_data = NULL;
+ char *xattr_name = NULL;
+ size_t xattr_len = 0;
+ gf_boolean_t is_stale = _gf_false;
+
+ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+ if (xattr_name == NULL) {
+ op_ret = 0;
+ return is_stale;
+ }
+
+ xattr_len = strlen(xattr_name);
+ arg_data = dict_getn(xdata, xattr_name, xattr_len);
+ if (!arg_data) {
+ op_ret = 0;
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+ return is_stale;
+ }
+
+ size = sys_lgetxattr(par_path, xattr_name, value_buf,
+ sizeof(value_buf) - 1);
+
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) path (%s) failed due to"
+ " buffer overflow",
+ xattr_name, par_path);
+ size = sys_lgetxattr(par_path, xattr_name, NULL, 0);
+ }
+ if (size < 0) {
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) failed, path : %s", xattr_name,
+ par_path);
+ goto out;
+ }
+ }
+
+ if (!have_val) {
+ size = sys_lgetxattr(par_path, xattr_name, value_buf, size);
+ if (size < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED,
+ "getxattr on key (%s) failed (%s)", xattr_name,
+ strerror(errno));
+ goto out;
+ }
+ }
+
+ if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) {
+ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED,
+ "failing preop as on-disk xattr value differs from argument "
+ "value for key %s",
+ xattr_name);
+ op_ret = -1;
+ }
+
+out:
+ dict_deln(xdata, xattr_name, xattr_len);
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+
+ if (op_ret == -1) {
+ is_stale = _gf_true;
+ }
+
+ return is_stale;
+}
+
+/* Delete user xattr from the file at the file-path specified by data and from
+ * dict */
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
+{
+ int ret;
+ char *real_path = data;
+
+ ret = sys_lremovexattr(real_path, k);
+ if (ret) {
+ gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
+ "removexattr failed. key %s path %s", k, real_path);
+ }
+
+ dict_del(dict, k);
+
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index a6c2b512ef1..6d54d37e5aa 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -54,6 +54,7 @@
#include <glusterfs/events.h>
#include "posix-gfid-path.h"
#include <glusterfs/compat-uuid.h>
+#include <glusterfs/common-utils.h>
extern char *marker_xattrs[];
#define ALIGN_SIZE 4096
@@ -1360,13 +1361,28 @@ out:
return 0;
}
+static void
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
+{
+ glusterfs_ctx_t *ctx = this->ctx;
+ struct posix_private *priv = this->private;
+
+ pfd->xl = this;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ list_add_tail(&pfd->list, &ctx->janitor_fds);
+ priv->rel_fdcount++;
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+}
+
int32_t
posix_releasedir(xlator_t *this, fd_t *fd)
{
struct posix_fd *pfd = NULL;
uint64_t tmp_pfd = 0;
int ret = 0;
- glusterfs_ctx_t *ctx = NULL;
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
@@ -1383,22 +1399,8 @@ posix_releasedir(xlator_t *this, fd_t *fd)
"pfd->dir is NULL for fd=%p", fd);
goto out;
}
+ posix_add_fd_to_cleanup(this, pfd);
- ctx = THIS->ctx;
-
- pthread_mutex_lock(&ctx->janitor_lock);
- {
- INIT_LIST_HEAD(&pfd->list);
- list_add_tail(&pfd->list, &ctx->janitor_fds);
- pthread_cond_signal(&ctx->janitor_cond);
- }
- pthread_mutex_unlock(&ctx->janitor_lock);
-
- /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
-
- sys_closedir(pfd->dir);
- GF_FREE(pfd);
- */
out:
return 0;
}
@@ -2305,8 +2307,7 @@ posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
flags);
if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
+ op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
"copy_file_range failed: fd_in: %p (gfid: %s) ,"
" fd_out %p (gfid:%s)",
@@ -2521,18 +2522,13 @@ out:
int32_t
posix_release(xlator_t *this, fd_t *fd)
{
- struct posix_private *priv = NULL;
struct posix_fd *pfd = NULL;
int ret = -1;
uint64_t tmp_pfd = 0;
- glusterfs_ctx_t *ctx = NULL;
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
- priv = this->private;
- ctx = THIS->ctx;
-
ret = fd_ctx_del(fd, this, &tmp_pfd);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
@@ -2546,16 +2542,7 @@ posix_release(xlator_t *this, fd_t *fd)
"pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
}
- pthread_mutex_lock(&ctx->janitor_lock);
- {
- INIT_LIST_HEAD(&pfd->list);
- list_add_tail(&pfd->list, &ctx->janitor_fds);
- pthread_cond_signal(&ctx->janitor_cond);
- }
- pthread_mutex_unlock(&ctx->janitor_lock);
-
- if (!priv)
- goto out;
+ posix_add_fd_to_cleanup(this, pfd);
out:
return 0;
@@ -2725,6 +2712,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t ret = 0;
ssize_t acl_size = 0;
dict_t *xattr = NULL;
+ dict_t *subvol_xattrs = NULL;
posix_xattr_filler_t filler = {
0,
};
@@ -2740,6 +2728,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
struct mdata_iatt mdata_iatt = {
0,
};
+ int8_t sync_backend_xattrs = _gf_false;
+ data_pair_t *custom_xattrs;
+ data_t *keyval = NULL;
+ char **xattrs_to_heal = get_xattrs_to_heal();
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
@@ -2922,6 +2914,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
goto out;
}
+ ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
+ }
+
+ if (sync_backend_xattrs) {
+ /* List all custom xattrs */
+ subvol_xattrs = dict_new();
+ if (!subvol_xattrs)
+ goto out;
+
+ ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "Unable to set list-xattr in dict ");
+ goto out;
+ }
+
+ subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ NULL);
+
+ /* Remove all user xattrs from the file */
+ dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
+ real_path);
+
+ /* Remove all custom xattrs from the file */
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
+ if (keyval) {
+ ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
+ errno, "removexattr failed. key %s path %s",
+ xattrs_to_heal[i], loc->path);
+ goto out;
+ }
+
+ dict_del(subvol_xattrs, xattrs_to_heal[i]);
+ keyval = NULL;
+ }
+ }
+
+ /* Set custom xattrs based on info provided by DHT */
+ custom_xattrs = dict->members_list;
+
+ while (custom_xattrs != NULL) {
+ ret = sys_lsetxattr(real_path, custom_xattrs->key,
+ custom_xattrs->value->data,
+ custom_xattrs->value->len, flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
+ custom_xattrs->key, ret);
+ goto out;
+ }
+
+ custom_xattrs = custom_xattrs->next;
+ }
+ }
+
xattr = dict_new();
if (!xattr)
goto out;
@@ -3029,6 +3081,9 @@ out:
if (xattr)
dict_unref(xattr);
+ if (subvol_xattrs)
+ dict_unref(subvol_xattrs);
+
return 0;
}
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
index 63e8771d3b1..d37014af93e 100644
--- a/xlators/storage/posix/src/posix-metadata.h
+++ b/xlators/storage/posix/src/posix-metadata.h
@@ -15,13 +15,15 @@
/* In memory representation posix metadata xattr */
typedef struct {
- /* version of structure, bumped up if any new member is added */
- uint8_t version;
/* flags indicates valid fields in the structure */
uint64_t flags;
struct timespec ctime;
struct timespec mtime;
struct timespec atime;
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+
+ char _pad[7]; /* manual padding */
} posix_mdata_t;
typedef struct {
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index ce4e1193639..b8db146eef2 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -119,12 +119,14 @@
*/
struct posix_fd {
- int fd; /* fd returned by the kernel */
- int32_t flags; /* flags for open/creat */
- DIR *dir; /* handle returned by the kernel */
- off_t dir_eof; /* offset at dir EOF */
- int odirect;
+ int fd; /* fd returned by the kernel */
+ int32_t flags; /* flags for open/creat */
+ DIR *dir; /* handle returned by the kernel */
+ off_t dir_eof; /* offset at dir EOF */
struct list_head list; /* to add to the janitor list */
+ int odirect;
+ xlator_t *xl;
+ char _pad[4]; /* manual padding */
};
struct posix_private {
@@ -135,67 +137,38 @@ struct posix_private {
gf_lock_t lock;
char *hostname;
- /* Statistics, provides activity of the server */
-
- struct timeval prev_fetch_time;
- struct timeval init_time;
time_t last_landfill_check;
- int32_t janitor_sleep_duration;
gf_atomic_t read_value; /* Total read, from init */
gf_atomic_t write_value; /* Total write, from init */
- /*
- In some cases, two exported volumes may reside on the same
- partition on the server. Sending statvfs info for both
- the volumes will lead to erroneous df output at the client,
- since free space on the partition will be counted twice.
-
- In such cases, user can disable exporting statvfs info
- on one of the volumes by setting this option.
- */
- gf_boolean_t export_statfs;
-
- gf_boolean_t o_direct; /* always open files in O_DIRECT mode */
-
- /*
- decide whether posix_unlink does open (file), unlink (file), close (fd)
- instead of just unlink (file). with the former approach there is no
- lockout of access to parent directory during removal of very large files
- for the entire duration of freeing of data blocks.
- */
- gf_boolean_t background_unlink;
/* janitor task which cleans up /.trash (created by replicate) */
struct gf_tw_timer_list *janitor;
char *trash_path;
/* lock for brick dir */
- DIR *mount_lock;
+ int mount_lock;
struct stat handledir;
/* uuid of glusterd that swapned the brick process */
uuid_t glusterd_uuid;
- gf_boolean_t aio_configured;
- gf_boolean_t aio_init_done;
- gf_boolean_t aio_capable;
#ifdef HAVE_LIBAIO
io_context_t ctxp;
pthread_t aiothread;
#endif
- /* node-uuid in pathinfo xattr */
- gf_boolean_t node_uuid_pathinfo;
-
pthread_t fsyncer;
struct list_head fsyncs;
pthread_mutex_t fsync_mutex;
pthread_cond_t fsync_cond;
pthread_mutex_t janitor_mutex;
pthread_cond_t janitor_cond;
+ pthread_cond_t fd_cond;
int fsync_queue_count;
+ int32_t janitor_sleep_duration;
enum {
BATCH_NONE = 0,
@@ -206,8 +179,6 @@ struct posix_private {
} batch_fsync_mode;
uint32_t batch_fsync_delay_usec;
- gf_boolean_t update_pgfid_nlinks;
- gf_boolean_t gfid2path;
char gfid2path_sep[8];
/* seconds to sleep between health checks */
@@ -215,13 +186,10 @@ struct posix_private {
/* seconds to sleep to wait for aio write finish for health checks */
uint32_t health_check_timeout;
pthread_t health_check;
- gf_boolean_t health_check_active;
double disk_reserve;
- char disk_unit;
- uint32_t disk_space_full;
pthread_t disk_space_check;
- gf_boolean_t disk_space_check_active;
+ uint32_t disk_space_full;
#ifdef GF_DARWIN_HOST_OS
enum {
@@ -236,9 +204,6 @@ struct posix_private {
same backend. Very much usable in brick-splitting feature. */
int32_t shared_brick_count;
- /* This option is used for either to call a landfill_purge or not */
- gf_boolean_t disable_landfill_purge;
-
/*Option to set mode bit permission that will always be set on
file/directory. */
mode_t force_create_mode;
@@ -246,10 +211,47 @@ struct posix_private {
mode_t create_mask;
mode_t create_directory_mask;
uint32_t max_hardlinks;
+ int32_t arrdfd[256];
+ int dirfd;
+
+ /* This option is used for either to call a landfill_purge or not */
+ gf_boolean_t disable_landfill_purge;
gf_boolean_t fips_mode_rchecksum;
gf_boolean_t ctime;
gf_boolean_t janitor_task_stop;
+
+ gf_boolean_t disk_space_check_active;
+ char disk_unit;
+ gf_boolean_t health_check_active;
+ gf_boolean_t update_pgfid_nlinks;
+ gf_boolean_t gfid2path;
+ /* node-uuid in pathinfo xattr */
+ gf_boolean_t node_uuid_pathinfo;
+ /*
+ In some cases, two exported volumes may reside on the same
+ partition on the server. Sending statvfs info for both
+ the volumes will lead to erroneous df output at the client,
+ since free space on the partition will be counted twice.
+
+ In such cases, user can disable exporting statvfs info
+ on one of the volumes by setting this option.
+ */
+ gf_boolean_t export_statfs;
+
+ gf_boolean_t o_direct; /* always open files in O_DIRECT mode */
+
+ /*
+ decide whether posix_unlink does open (file), unlink (file), close (fd)
+ instead of just unlink (file). with the former approach there is no
+ lockout of access to parent directory during removal of very large files
+ for the entire duration of freeing of data blocks.
+ */
+ gf_boolean_t background_unlink;
+ gf_boolean_t aio_configured;
+ gf_boolean_t aio_init_done;
+ gf_boolean_t aio_capable;
+ uint32_t rel_fdcount;
};
typedef struct {
@@ -263,9 +265,11 @@ typedef struct {
fd_t *fd;
int fdnum;
int flags;
- int32_t op_errno;
char *list;
size_t list_size;
+ int32_t op_errno;
+
+ char _pad[4]; /* manual padding */
} posix_xattr_filler_t;
typedef struct {
@@ -325,6 +329,7 @@ posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename,
int
posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *real_path,
struct iatt *iatt, gf_boolean_t inode_locked);
+
dict_t *
posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd,
int fdnum, dict_t *xattr, struct iatt *buf);
@@ -658,4 +663,11 @@ posix_spawn_ctx_janitor_thread(xlator_t *this);
void
posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+
+gf_boolean_t
+posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
#endif /* _POSIX_H */