summaryrefslogtreecommitdiffstats
path: root/xlators/storage
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2017-11-03 09:57:08 -0400
committerAmar Tumballi <amarts@redhat.com>2018-05-06 01:32:30 +0000
commit3e24848f1e568ed3307683a9786f33d1ee15209b (patch)
tree44ea76321366fba4f2d2c702e07cf676140770c8 /xlators/storage
parent80262e2984b44609e9f572cf11dbc24fb6aea4cc (diff)
posix: APIs in posix to get and set time attributes
This is part of the effort to provide consistent time across distribute and replica set for time attributes (ctime, atime, mtime) of the object. This patch contains the APIs to set and get the attributes from on disk and in inode context. Credits: Rafi KC <rkavunga@redhat.com> Updates: #208 Change-Id: I5d3cba53eef90ac252cb8299c0da42ebab3bde9f Signed-off-by: Kotresh HR <khiremat@redhat.com>
Diffstat (limited to 'xlators/storage')
-rw-r--r--xlators/storage/posix/src/Makefile.am5
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c18
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h1
-rw-r--r--xlators/storage/posix/src/posix-messages.h4
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c510
-rw-r--r--xlators/storage/posix/src/posix-metadata.h49
-rw-r--r--xlators/storage/posix/src/posix.h17
8 files changed, 614 insertions, 21 deletions
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index 59d462336d1..d8af6221e4e 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -7,12 +7,13 @@ posix_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c \
posix-gfid-path.c posix-entry-ops.c posix-inode-fd-ops.c \
- posix-common.c
+ posix-common.c posix-metadata.c
posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO) \
$(ACL_LIBS)
noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \
- posix-messages.h posix-gfid-path.h posix-inode-handle.h
+ posix-messages.h posix-gfid-path.h posix-inode-handle.h \
+ posix-metadata.h posix-metadata-disk.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
index 0abe380ee43..438fbe509e4 100644
--- a/xlators/storage/posix/src/posix-entry-ops.c
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -88,24 +88,6 @@ extern char *marker_xattrs[];
#endif
-/* Setting microseconds or nanoseconds depending on what's supported:
- The passed in `tv` can be
- struct timespec
- if supported (better, because it supports nanosecond resolution) or
- struct timeval
- otherwise. */
-#if HAVE_UTIMENSAT
-#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
- tv.tv_nsec = nanosecs
-#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
- (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW))
-#else
-#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
- tv.tv_usec = nanosecs / 1000
-#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
- (lutimes (path, tv))
-#endif
-
gf_boolean_t
posix_symlinks_match (xlator_t *this, loc_t *loc, uuid_t gfid)
{
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index b463c086be5..0180900ee8e 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -22,6 +22,7 @@ enum gf_posix_mem_types_ {
gf_posix_mt_trash_path,
gf_posix_mt_paiocb,
gf_posix_mt_inode_ctx_t,
+ gf_posix_mt_mdata_attr,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index a05f6739958..6b5332b6d09 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -136,7 +136,9 @@ GLFS_MSGID(POSIX,
P_MSG_LEASE_DISABLED,
P_MSG_ANCESTORY_FAILED,
P_MSG_DISK_SPACE_CHECK_FAILED,
- P_MSG_FALLOCATE_FAILED
+ P_MSG_FALLOCATE_FAILED,
+ P_MSG_STOREMDATA_FAILED,
+ P_MSG_FETCHMDATA_FAILED
);
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/storage/posix/src/posix-metadata-disk.h b/xlators/storage/posix/src/posix-metadata-disk.h
new file mode 100644
index 00000000000..b25ad04a633
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata-disk.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_METADATA_DISK_H
+#define _POSIX_METADATA_DISK_H
+
+typedef struct gf_timespec_disk {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+} gf_timespec_disk_t;
+
+/* posix_mdata_t on disk structure */
+
+typedef struct __attribute__ ((__packed__)) posix_mdata_disk {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ gf_timespec_disk_t ctime;
+ gf_timespec_disk_t mtime;
+ gf_timespec_disk_t atime;
+} posix_mdata_disk_t;
+
+#endif /* _POSIX_METADATA_DISK_H */
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
new file mode 100644
index 00000000000..4e75a4f1411
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.c
@@ -0,0 +1,510 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "posix-metadata.h"
+#include "posix-metadata-disk.h"
+#include "posix-handle.h"
+#include "posix-messages.h"
+#include "syscall.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+static int gf_posix_xattr_enotsup_log;
+
+/* posix_mdata_to_disk converts posix_mdata_t into network byte order to
+ * save it on disk in machine independant format
+ */
+static inline void
+posix_mdata_to_disk (posix_mdata_disk_t *out, posix_mdata_t *in)
+{
+ out->version = in->version;
+ out->flags = htobe64(in->flags);
+
+ out->ctime.tv_sec = htobe64(in->ctime.tv_sec);
+ out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = htobe64(in->mtime.tv_sec);
+ out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = htobe64(in->atime.tv_sec);
+ out->atime.tv_nsec = htobe64(in->atime.tv_nsec);
+}
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk (posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr (xlator_t *this, const char *real_path_arg, int _fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ size_t size = -1;
+ int op_errno = 0;
+ int op_ret = -1;
+ char *value = NULL;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char gfid_str[64] = {0};
+ char *real_path = NULL;
+
+ char *key = GF_XATTR_MDATA_KEY;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (_fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r (inode->gfid, gfid_str);
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_LSTAT_FAILED, "lstat on gfid %s failed",
+ gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr (real_path_arg, key, NULL, 0);
+ } else if (real_path) {
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ }
+
+ if (size == -1) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_msg_debug (this->name, 0,
+ "No such attribute:%s for file %s "
+ "gfid: %s",
+ key, real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid));
+ } else {
+ gf_msg (this->name, GF_LOG_DEBUG, op_errno,
+ P_MSG_XATTR_FAILED, "getxattr failed"
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ op_ret = -1;
+ goto out;
+ }
+
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr (_fd, key, value, size);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr (real_path_arg, key, value, size);
+ } else if (real_path) {
+ size = sys_lgetxattr (real_path, key, value, size);
+ }
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ goto out;
+ }
+
+ posix_mdata_from_disk (metadata, (posix_mdata_disk_t*)value);
+
+ op_ret = 0;
+out:
+ GF_FREE (value);
+ return op_ret;
+}
+
+/* posix_store_mdata_xattr stores the posix_mdata_t on disk */
+static int
+posix_store_mdata_xattr (xlator_t *this, const char *real_path_arg, int fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ char *real_path = NULL;
+ int op_ret = 0;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char *key = GF_XATTR_MDATA_KEY;
+ char gfid_str[64] = {0};
+ posix_mdata_disk_t disk_metadata;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r (inode->gfid, gfid_str);
+ gf_msg (this->name, GF_LOG_DEBUG, errno,
+ P_MSG_LSTAT_FAILED, "lstat on gfid %s failed",
+ gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ /* Set default version as 1 */
+ posix_mdata_to_disk (&disk_metadata, metadata);
+
+ if (fd_based_fop) {
+ op_ret = sys_fsetxattr (fd, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ } else if (real_path_arg) {
+ op_ret = sys_lsetxattr (real_path_arg, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ } else if (real_path) {
+ op_ret = sys_lsetxattr (real_path, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+ if (real_path_arg) {
+ posix_dump_buffer(this, real_path_arg, key, value, 0);
+ } else if (real_path) {
+ posix_dump_buffer(this, real_path, key, value, 0);
+ }
+#endif
+out:
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ return op_ret;
+}
+
+/* _posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with out inode lock.
+ */
+int
+__posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get1 (inode, this,
+ (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ mdata = GF_CALLOC (1, sizeof (posix_mdata_t),
+ gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = posix_fetch_mdata_xattr (this, real_path, _fd, inode,
+ mdata);
+
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ __inode_ctx_set1 (inode, this, (uint64_t *)&mdata);
+ } else {
+ /* Failed to get mdata from disk, xattr missing
+ * Even new file creation hits here first as posix_pstat
+ * is generally done before posix_set_ctime
+ */
+ if (stbuf) {
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = stbuf->ia_ctime;
+ mdata->ctime.tv_nsec = stbuf->ia_ctime_nsec;
+ mdata->mtime.tv_sec = stbuf->ia_mtime;
+ mdata->mtime.tv_nsec = stbuf->ia_mtime_nsec;
+ mdata->atime.tv_sec = stbuf->ia_atime;
+ mdata->atime.tv_nsec = stbuf->ia_atime_nsec;
+ ret = posix_store_mdata_xattr (this, real_path,
+ _fd, inode,
+ mdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ __inode_ctx_set1 (inode, this, (uint64_t *)&mdata);
+ } else {
+ /* This case should not be hit. If it hits, don't
+ * fail, log warning, free mdata and move on
+ */
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_FETCHMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ GF_FREE (mdata);
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+out:
+ return ret;
+}
+
+/* posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with inode lock.
+ */
+int
+posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __posix_get_mdata_xattr (this, real_path, _fd, inode, stbuf);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+static int
+posix_compare_timespec (struct timespec *first, struct timespec *second)
+{
+ if (first->tv_sec == second->tv_sec)
+ return first->tv_nsec - second->tv_nsec;
+ else
+ return first->tv_sec - second->tv_sec;
+}
+
+/* posix_update_mdata_xattr updates the posix_mdata_t based on the flag
+ * in inode context and stores it on disk
+ */
+int
+posix_set_mdata_xattr (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *time,
+ struct iatt *stbuf, posix_mdata_flag_t *flag)
+{
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("posix", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode->gfid, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get1 (inode, this,
+ (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ /*
+ * Do we need to fetch the data from xattr
+ * If we does we can compare the value and store
+ * the largest data in inode ctx.
+ */
+ mdata = GF_CALLOC (1, sizeof (posix_mdata_t),
+ gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = posix_fetch_mdata_xattr (this, real_path, fd,
+ inode,
+ (void *)mdata);
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ __inode_ctx_set1 (inode, this,
+ (uint64_t *)&mdata);
+ } else if (ret && stbuf) {
+ /*
+ * This is the first time creating the time
+ * attr. This happens when you activate this
+ * feature, and the legacy file will not have
+ * any xattr set.
+ *
+ * New files will create extended attributes.
+ */
+
+ /*
+ * TODO: This is wrong approach, because before
+ * creating fresh xattr, we should consult
+ * to all replica and/or distribution set.
+ *
+ * We should contact the time management
+ * xlators, and ask them to create an xattr.
+ */
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = stbuf->ia_ctime;
+ mdata->ctime.tv_nsec = stbuf->ia_ctime_nsec;
+ mdata->atime.tv_sec = stbuf->ia_atime;
+ mdata->atime.tv_nsec = stbuf->ia_atime_nsec;
+ mdata->mtime.tv_sec = stbuf->ia_mtime;
+ mdata->mtime.tv_nsec = stbuf->ia_mtime_nsec;
+
+ __inode_ctx_set1 (inode, this,
+ (uint64_t *)&mdata);
+ }
+ }
+ if (flag->ctime &&
+ posix_compare_timespec (time, &mdata->ctime) > 0) {
+ mdata->ctime = *time;
+ }
+ if (flag->mtime &&
+ posix_compare_timespec (time, &mdata->mtime) > 0) {
+ mdata->mtime = *time;
+ }
+ if (flag->atime &&
+ posix_compare_timespec (time, &mdata->atime) > 0) {
+ mdata->atime = *time;
+ }
+
+ if (inode->ia_type == IA_INVAL) {
+ /*
+ * TODO: This is non-linked inode. So we have to sync the
+ * data into backend. Because inode_link may return
+ * a different inode.
+ */
+ /* ret = posix_store_mdata_xattr (this, loc, fd,
+ * mdata); */
+ }
+ /*
+ * With this patch set, we are setting the xattr for each update
+ * We should evaluate the performance, and based on that we can
+ * decide on asynchronous updation.
+ */
+ ret = posix_store_mdata_xattr (this, real_path, fd, inode,
+ mdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+out:
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+ return ret;
+}
+
+/* posix_update_utime_in_mdata updates the posix_mdata_t when mtime/atime
+ * is modified using syscall
+ */
+int
+posix_update_utime_in_mdata (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode,
+ struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv = {0, };
+#else
+ struct timeval tv = {0, };
+#endif
+ posix_mdata_flag_t flag = {0, };
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv.tv_sec = stbuf->ia_atime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec);
+
+ flag.ctime = 0;
+ flag.mtime = 0;
+ flag.atime = 1;
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv.tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec);
+ flag.ctime = 1;
+ flag.mtime = 1;
+ flag.atime = 0;
+ }
+
+ ret = posix_set_mdata_xattr (this, real_path, -1, inode, &tv, NULL,
+ &flag);
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
new file mode 100644
index 00000000000..b654c83230c
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_METADATA_H
+#define _POSIX_METADATA_H
+
+#include "posix-metadata-disk.h"
+
+/* In memory representation posix metadata xattr */
+typedef struct {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+} posix_mdata_t;
+
+typedef struct {
+ unsigned short ctime : 1;
+ unsigned short mtime : 1;
+ unsigned short atime : 1;
+} posix_mdata_flag_t;
+
+/* With inode lock*/
+int
+posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+/* With out inode lock*/
+int
+__posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+int
+posix_set_mdata_xattr (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *time,
+ struct iatt *stbuf, posix_mdata_flag_t *flag);
+int
+posix_update_utime_in_mdata (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct iatt *stbuf, int valid);
+
+#endif /* _POSIX_METADATA_H */
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 5f28be9e414..60a7132ddcb 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -77,6 +77,23 @@
} \
} while (0)
+/* Setting microseconds or nanoseconds depending on what's supported:
+ The passed in `tv` can be
+ struct timespec
+ if supported (better, because it supports nanosecond resolution) or
+ struct timeval
+ otherwise. */
+#if HAVE_UTIMENSAT
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_nsec = nanosecs
+#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
+ (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW))
+#else
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_usec = nanosecs / 1000
+#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
+ (lutimes (path, tv))
+#endif
#define GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xattr_req, op_ret, \
op_errno, out) \