summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src/posix-metadata.c
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2017-11-03 09:57:08 -0400
committerAmar Tumballi <amarts@redhat.com>2018-05-06 01:32:30 +0000
commit3e24848f1e568ed3307683a9786f33d1ee15209b (patch)
tree44ea76321366fba4f2d2c702e07cf676140770c8 /xlators/storage/posix/src/posix-metadata.c
parent80262e2984b44609e9f572cf11dbc24fb6aea4cc (diff)
posix: APIs in posix to get and set time attributes
This is part of the effort to provide consistent time across distribute and replica set for time attributes (ctime, atime, mtime) of the object. This patch contains the APIs to set and get the attributes from on disk and in inode context. Credits: Rafi KC <rkavunga@redhat.com> Updates: #208 Change-Id: I5d3cba53eef90ac252cb8299c0da42ebab3bde9f Signed-off-by: Kotresh HR <khiremat@redhat.com>
Diffstat (limited to 'xlators/storage/posix/src/posix-metadata.c')
-rw-r--r--xlators/storage/posix/src/posix-metadata.c510
1 files changed, 510 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
new file mode 100644
index 00000000000..4e75a4f1411
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.c
@@ -0,0 +1,510 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "posix-metadata.h"
+#include "posix-metadata-disk.h"
+#include "posix-handle.h"
+#include "posix-messages.h"
+#include "syscall.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+static int gf_posix_xattr_enotsup_log;
+
+/* posix_mdata_to_disk converts posix_mdata_t into network byte order to
+ * save it on disk in machine independant format
+ */
+static inline void
+posix_mdata_to_disk (posix_mdata_disk_t *out, posix_mdata_t *in)
+{
+ out->version = in->version;
+ out->flags = htobe64(in->flags);
+
+ out->ctime.tv_sec = htobe64(in->ctime.tv_sec);
+ out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = htobe64(in->mtime.tv_sec);
+ out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = htobe64(in->atime.tv_sec);
+ out->atime.tv_nsec = htobe64(in->atime.tv_nsec);
+}
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk (posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr (xlator_t *this, const char *real_path_arg, int _fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ size_t size = -1;
+ int op_errno = 0;
+ int op_ret = -1;
+ char *value = NULL;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char gfid_str[64] = {0};
+ char *real_path = NULL;
+
+ char *key = GF_XATTR_MDATA_KEY;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (_fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r (inode->gfid, gfid_str);
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_LSTAT_FAILED, "lstat on gfid %s failed",
+ gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr (real_path_arg, key, NULL, 0);
+ } else if (real_path) {
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ }
+
+ if (size == -1) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_msg_debug (this->name, 0,
+ "No such attribute:%s for file %s "
+ "gfid: %s",
+ key, real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid));
+ } else {
+ gf_msg (this->name, GF_LOG_DEBUG, op_errno,
+ P_MSG_XATTR_FAILED, "getxattr failed"
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ op_ret = -1;
+ goto out;
+ }
+
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr (_fd, key, value, size);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr (real_path_arg, key, value, size);
+ } else if (real_path) {
+ size = sys_lgetxattr (real_path, key, value, size);
+ }
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ goto out;
+ }
+
+ posix_mdata_from_disk (metadata, (posix_mdata_disk_t*)value);
+
+ op_ret = 0;
+out:
+ GF_FREE (value);
+ return op_ret;
+}
+
+/* posix_store_mdata_xattr stores the posix_mdata_t on disk */
+static int
+posix_store_mdata_xattr (xlator_t *this, const char *real_path_arg, int fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ char *real_path = NULL;
+ int op_ret = 0;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char *key = GF_XATTR_MDATA_KEY;
+ char gfid_str[64] = {0};
+ posix_mdata_disk_t disk_metadata;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r (inode->gfid, gfid_str);
+ gf_msg (this->name, GF_LOG_DEBUG, errno,
+ P_MSG_LSTAT_FAILED, "lstat on gfid %s failed",
+ gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ /* Set default version as 1 */
+ posix_mdata_to_disk (&disk_metadata, metadata);
+
+ if (fd_based_fop) {
+ op_ret = sys_fsetxattr (fd, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ } else if (real_path_arg) {
+ op_ret = sys_lsetxattr (real_path_arg, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ } else if (real_path) {
+ op_ret = sys_lsetxattr (real_path, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+ if (real_path_arg) {
+ posix_dump_buffer(this, real_path_arg, key, value, 0);
+ } else if (real_path) {
+ posix_dump_buffer(this, real_path, key, value, 0);
+ }
+#endif
+out:
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ return op_ret;
+}
+
+/* _posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with out inode lock.
+ */
+int
+__posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get1 (inode, this,
+ (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ mdata = GF_CALLOC (1, sizeof (posix_mdata_t),
+ gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = posix_fetch_mdata_xattr (this, real_path, _fd, inode,
+ mdata);
+
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ __inode_ctx_set1 (inode, this, (uint64_t *)&mdata);
+ } else {
+ /* Failed to get mdata from disk, xattr missing
+ * Even new file creation hits here first as posix_pstat
+ * is generally done before posix_set_ctime
+ */
+ if (stbuf) {
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = stbuf->ia_ctime;
+ mdata->ctime.tv_nsec = stbuf->ia_ctime_nsec;
+ mdata->mtime.tv_sec = stbuf->ia_mtime;
+ mdata->mtime.tv_nsec = stbuf->ia_mtime_nsec;
+ mdata->atime.tv_sec = stbuf->ia_atime;
+ mdata->atime.tv_nsec = stbuf->ia_atime_nsec;
+ ret = posix_store_mdata_xattr (this, real_path,
+ _fd, inode,
+ mdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ __inode_ctx_set1 (inode, this, (uint64_t *)&mdata);
+ } else {
+ /* This case should not be hit. If it hits, don't
+ * fail, log warning, free mdata and move on
+ */
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_FETCHMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ GF_FREE (mdata);
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+out:
+ return ret;
+}
+
+/* posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with inode lock.
+ */
+int
+posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __posix_get_mdata_xattr (this, real_path, _fd, inode, stbuf);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+static int
+posix_compare_timespec (struct timespec *first, struct timespec *second)
+{
+ if (first->tv_sec == second->tv_sec)
+ return first->tv_nsec - second->tv_nsec;
+ else
+ return first->tv_sec - second->tv_sec;
+}
+
+/* posix_update_mdata_xattr updates the posix_mdata_t based on the flag
+ * in inode context and stores it on disk
+ */
+int
+posix_set_mdata_xattr (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *time,
+ struct iatt *stbuf, posix_mdata_flag_t *flag)
+{
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("posix", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode->gfid, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get1 (inode, this,
+ (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ /*
+ * Do we need to fetch the data from xattr
+ * If we does we can compare the value and store
+ * the largest data in inode ctx.
+ */
+ mdata = GF_CALLOC (1, sizeof (posix_mdata_t),
+ gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = posix_fetch_mdata_xattr (this, real_path, fd,
+ inode,
+ (void *)mdata);
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ __inode_ctx_set1 (inode, this,
+ (uint64_t *)&mdata);
+ } else if (ret && stbuf) {
+ /*
+ * This is the first time creating the time
+ * attr. This happens when you activate this
+ * feature, and the legacy file will not have
+ * any xattr set.
+ *
+ * New files will create extended attributes.
+ */
+
+ /*
+ * TODO: This is wrong approach, because before
+ * creating fresh xattr, we should consult
+ * to all replica and/or distribution set.
+ *
+ * We should contact the time management
+ * xlators, and ask them to create an xattr.
+ */
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = stbuf->ia_ctime;
+ mdata->ctime.tv_nsec = stbuf->ia_ctime_nsec;
+ mdata->atime.tv_sec = stbuf->ia_atime;
+ mdata->atime.tv_nsec = stbuf->ia_atime_nsec;
+ mdata->mtime.tv_sec = stbuf->ia_mtime;
+ mdata->mtime.tv_nsec = stbuf->ia_mtime_nsec;
+
+ __inode_ctx_set1 (inode, this,
+ (uint64_t *)&mdata);
+ }
+ }
+ if (flag->ctime &&
+ posix_compare_timespec (time, &mdata->ctime) > 0) {
+ mdata->ctime = *time;
+ }
+ if (flag->mtime &&
+ posix_compare_timespec (time, &mdata->mtime) > 0) {
+ mdata->mtime = *time;
+ }
+ if (flag->atime &&
+ posix_compare_timespec (time, &mdata->atime) > 0) {
+ mdata->atime = *time;
+ }
+
+ if (inode->ia_type == IA_INVAL) {
+ /*
+ * TODO: This is non-linked inode. So we have to sync the
+ * data into backend. Because inode_link may return
+ * a different inode.
+ */
+ /* ret = posix_store_mdata_xattr (this, loc, fd,
+ * mdata); */
+ }
+ /*
+ * With this patch set, we are setting the xattr for each update
+ * We should evaluate the performance, and based on that we can
+ * decide on asynchronous updation.
+ */
+ ret = posix_store_mdata_xattr (this, real_path, fd, inode,
+ mdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+out:
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+ return ret;
+}
+
+/* posix_update_utime_in_mdata updates the posix_mdata_t when mtime/atime
+ * is modified using syscall
+ */
+int
+posix_update_utime_in_mdata (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode,
+ struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv = {0, };
+#else
+ struct timeval tv = {0, };
+#endif
+ posix_mdata_flag_t flag = {0, };
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv.tv_sec = stbuf->ia_atime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec);
+
+ flag.ctime = 0;
+ flag.mtime = 0;
+ flag.atime = 1;
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv.tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec);
+ flag.ctime = 1;
+ flag.mtime = 1;
+ flag.atime = 0;
+ }
+
+ ret = posix_set_mdata_xattr (this, real_path, -1, inode, &tv, NULL,
+ &flag);
+ return ret;
+}